def analyse_coh(args): import seq_manager as sm import os query = sm.WorkSeq(args['name'][:4], args['name'], sm.read_multi_fastas('all_cohs.txt')[args['name']+'.A']['seq']) print args print query strands = get_strands_seq(query.name, query.fasta) all_coh_fastas = sm.read_multi_fastas('Coh_All_Setup-Oct2014-d-for-lizi.txt') pass_threshold = [{'name': query.name, 'seq': query.fasta}] for hit_name, hit_val in all_coh_fastas.items(): # print hit_val pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper()) if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(strands, pw_aln[1], 4) \ and not gap_in_essential(strands, pw_aln[0], 4): pass_threshold.append(hit_val) # else: # print 'not accepted' # print pw_aln[0] # print pw_aln[1] # print ''.join(a if i in strands else '_' for i, a in enumerate(query.fasta)) print 'found %i seqs that passed the thresholds' % len(pass_threshold) # import sys # sys.exit() pass_thresh_fasta_name = args['path']+args['name']+'_passed_thresholds.fasta' sm.write_multi_seqs_to_file(pass_threshold, pass_thresh_fasta_name) os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path']+args['name']+'_passed_thresholds.aln') aln_fastas = sm.read_multi_fastas(args['path']+args['name']+'_passed_thresholds.aln') os.system('weblogo -f %s -o %s --annotate %s' % (args['path']+args['name']+'_passed_thresholds.aln', args['path']+args['name']+'_passed_thresholds.eps', ','.join(a for a in aln_fastas[args['name']]['seq'].replace('-', '-')))) sm.write_seq_from_seq(args['name'], args['path'], query.fasta) sm.run_psi_blast_for_pssm(args['path']+args['name']+'.fasta', args['path']+args['name']+'_passed_thresholds.aln', args['path']+args['name']+'.pssm')
def analyse_doc(args): import seq_manager as sm import os query = sm.WorkSeq(args['name'][:4], args['name'], sm.read_multi_fastas('all_docs.fasta')[args['name']+'.B']['seq']) helices = get_helices_seq(args['name'][:4], query.fasta) all_doc_fastas = sm.read_multi_fastas('all_dockerins_May2015.txt') pass_thresh = [{'name': args['name'], 'seq': query.fasta}] for hit_name, hit_val in all_doc_fastas.items(): pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper()) if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(helices, pw_aln[1]) \ and not gap_in_essential(helices, pw_aln[0]): pass_thresh.append(hit_val) pass_thresh_fasta_name = args['path']+args['name']+'_passed_thresholds.fasta' sm.write_multi_seqs_to_file({a['name']: a for a in pass_thresh}, pass_thresh_fasta_name) os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path']+args['name']+'_passed_thresholds.aln') aln_fastas = sm.read_multi_fastas(args['path']+args['name']+'_passed_thresholds.aln') os.system('weblogo -f %s -o %s --annotate %s' % (args['path']+args['name']+'_passed_thresholds.aln', args['path']+args['name']+'_passed_thresholds.eps', ','.join(a for a in aln_fastas[args['name']]['seq'].replace('-', '-')))) print 'aln_fastas', aln_fastas sm.write_seq_from_seq(args['name'], args['path'], query.fasta) sm.write_multi_seqs_to_file(aln_fastas, args['path']+args['name']+'_passed_thresholds.aln', args['name']) sm.run_psi_blast_for_pssm(args['path']+args['name']+'.fasta', args['path']+args['name']+'_passed_thresholds.aln', args['path']+args['name']+'.pssm')
def analyse_coh(args): import seq_manager as sm import os query = sm.WorkSeq( args['name'][:4], args['name'], sm.read_multi_fastas('all_cohs.txt')[args['name'] + '.A']['seq']) print args print query strands = get_strands_seq(query.name, query.fasta) all_coh_fastas = sm.read_multi_fastas( 'Coh_All_Setup-Oct2014-d-for-lizi.txt') pass_threshold = [{'name': query.name, 'seq': query.fasta}] for hit_name, hit_val in all_coh_fastas.items(): # print hit_val pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper()) if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(strands, pw_aln[1], 4) \ and not gap_in_essential(strands, pw_aln[0], 4): pass_threshold.append(hit_val) # else: # print 'not accepted' # print pw_aln[0] # print pw_aln[1] # print ''.join(a if i in strands else '_' for i, a in enumerate(query.fasta)) print 'found %i seqs that passed the thresholds' % len(pass_threshold) # import sys # sys.exit() pass_thresh_fasta_name = args['path'] + args[ 'name'] + '_passed_thresholds.fasta' sm.write_multi_seqs_to_file(pass_threshold, pass_thresh_fasta_name) os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path'] + args['name'] + '_passed_thresholds.aln') aln_fastas = sm.read_multi_fastas(args['path'] + args['name'] + '_passed_thresholds.aln') os.system( 'weblogo -f %s -o %s --annotate %s' % (args['path'] + args['name'] + '_passed_thresholds.aln', args['path'] + args['name'] + '_passed_thresholds.eps', ','.join( a for a in aln_fastas[args['name']]['seq'].replace('-', '-')))) sm.write_seq_from_seq(args['name'], args['path'], query.fasta) sm.run_psi_blast_for_pssm( args['path'] + args['name'] + '.fasta', args['path'] + args['name'] + '_passed_thresholds.aln', args['path'] + args['name'] + '.pssm')
def analyse_doc(args): import seq_manager as sm import os query = sm.WorkSeq( args['name'][:4], args['name'], sm.read_multi_fastas('all_docs.fasta')[args['name'] + '.B']['seq']) helices = get_helices_seq(args['name'][:4], query.fasta) all_doc_fastas = sm.read_multi_fastas('all_dockerins_May2015.txt') pass_thresh = [{'name': args['name'], 'seq': query.fasta}] for hit_name, hit_val in all_doc_fastas.items(): pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper()) if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(helices, pw_aln[1]) \ and not gap_in_essential(helices, pw_aln[0]): pass_thresh.append(hit_val) pass_thresh_fasta_name = args['path'] + args[ 'name'] + '_passed_thresholds.fasta' sm.write_multi_seqs_to_file({a['name']: a for a in pass_thresh}, pass_thresh_fasta_name) os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path'] + args['name'] + '_passed_thresholds.aln') aln_fastas = sm.read_multi_fastas(args['path'] + args['name'] + '_passed_thresholds.aln') os.system( 'weblogo -f %s -o %s --annotate %s' % (args['path'] + args['name'] + '_passed_thresholds.aln', args['path'] + args['name'] + '_passed_thresholds.eps', ','.join( a for a in aln_fastas[args['name']]['seq'].replace('-', '-')))) print 'aln_fastas', aln_fastas sm.write_seq_from_seq(args['name'], args['path'], query.fasta) sm.write_multi_seqs_to_file( aln_fastas, args['path'] + args['name'] + '_passed_thresholds.aln', args['name']) sm.run_psi_blast_for_pssm( args['path'] + args['name'] + '.fasta', args['path'] + args['name'] + '_passed_thresholds.aln', args['path'] + args['name'] + '.pssm')