def main(): """ main function """ opts = get_options() genetic_code = None if opts.aa else get_genetic_code(opts.code) sequences = parse_fasta(opts.fastafile, genetic_code) tmp_dir = dirname( opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp') Popen('mkdir -p ' + tmp_dir, shell=True).communicate() ### if we need to align: # write sense and anti-sense translated sequences this = 'seq' if opts.aa else 'prot' write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this) if opts.align == 2: write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True) # run alignment if opts.align: run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align) # merge all in one, keep only sites with score better than m_coffee cut aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] if len(aligners) > 1 or opts.align == 2: merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa) else: aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0]) for seq in sequences: sequences[seq]['aa_ali'] = aa_ali[seq]['seq'] for elt in xrange(len(sequences[seq]['aa_ali'])): if sequences[seq]['aa_ali'][elt] == '-': sequences[seq]['codon'].insert(elt, '---') continue # trimal if opts.trimseq: trim_sequences(tmp_dir, opts.outfile, sequences, opts.trimseq, quiet=opts.quiet) if opts.trimcol != 'None': trim_columns(sequences, opts, tmp_dir) # write codon sequences if opts.aa: write_fasta(sequences, opts.outfile, what='aa_ali') else: write_fasta(sequences, opts.outfile, what='codon') # print map if opts.printmap: printmap(sequences, opts.outfile + '.map', opts.pymap)
def run_alignments(path, cpus=1, quiet=False, tries=2): """ """ procs = [] files = [] aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] for ali in sorted( aligners, key=lambda x: ['probcons', 'dialign', 'muscle', 'mafft'].index(x)): if not quiet: print 'Aligning with: ' + ali for sense in ['prot', 'torp'][:tries]: if not quiet: print ' -> ' + ('sense' if sense == 'prot' else 'anti-sense') files.append('%s.fasta_%s' % (sense, ali)) procs.append(BINARIES[ali]['fun'](path + '/%s.fasta' % (sense))) if len(procs) < cpus: continue while len(procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception('\nERROR: running alignments') while len(procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception('\nERROR: running alignments') for fil in files: if fil.startswith('torp'): seqs = parse_fasta(path + '/' + fil) for seq in seqs: seqs[seq]['seq'] = seqs[seq]['seq'][::-1] write_rfasta(seqs, path + '/' + fil)
def trim_sequences(path, seq_path, seqs, trimseq, quiet=True): trimsq_path = path + '/seq_trimmed.fasta' proc = Popen([ BINARIES['trimal']['bin'], '-in', seq_path, '-out', trimsq_path, '-resoverlap', str(trimseq[1]), '-seqoverlap', str(trimseq[2]), '-cons', '100' ], stdout=PIPE) if proc.communicate()[1] is not None: print >> stderr, proc.communicate()[0] exit('\nERROR: trimming sequences') trimmed = parse_fasta(trimsq_path) for seq in trimmed: seqs[seq]['ali'] = trimmed[seq]['seq'] trimmed = filter(lambda x: not seqs[x].has_key('ali'), seqs) if not quiet: print >> stderr, 'WARNING: trimmed sequences: \n\t' + \ '\n\t'.join(trimmed) LOG.append('') if len(trimmed) > 0: LOG[-1] += '->trimmed sequences: \n\t' + \ '\n\t'.join(trimmed) + '\n' else: LOG[-1] += '->no trimmed sequences\n' for s in seqs.keys(): if s in trimmed: del (seqs[s]) return trimsq_path