def run_alignments(path, cpus=1, quiet=False, tries=2): """ """ procs = [] files = [] aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] for ali in sorted( aligners, key=lambda x: ['probcons', 'dialign', 'muscle', 'mafft'].index(x)): if not quiet: print 'Aligning with: ' + ali for sense in ['prot', 'torp'][:tries]: if not quiet: print ' -> ' + ('sense' if sense == 'prot' else 'anti-sense') files.append('%s.fasta_%s' % (sense, ali)) procs.append(BINARIES[ali]['fun'](path + '/%s.fasta' % (sense))) if len(procs) < cpus: continue while len(procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception('\nERROR: running alignments') while len(procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception('\nERROR: running alignments') for fil in files: if fil.startswith('torp'): seqs = parse_fasta(path + '/' + fil) for seq in seqs: seqs[seq]['seq'] = seqs[seq]['seq'][::-1] write_rfasta(seqs, path + '/' + fil)
def run_alignments(path, cpus=1, quiet=False, tries=2): """ """ procs = [] files = [] aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] for ali in sorted(aligners, key=lambda x:['probcons', 'dialign', 'muscle','mafft'].index(x)): if not quiet: print 'Aligning with: ' + ali for sense in ['prot', 'torp'][:tries]: if not quiet: print ' -> ' + ('sense' if sense == 'prot' else 'anti-sense') files.append('%s.fasta_%s' % (sense, ali)) procs.append (BINARIES[ali]['fun'](path + '/%s.fasta' % (sense))) if len (procs) < cpus: continue while len (procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception ('\nERROR: running alignments') while len (procs) != 0: out, err = procs.pop(0).communicate() if 'ERROR' in err: print >> stderr, out, err raise Exception ('\nERROR: running alignments') for fil in files: if fil.startswith('torp'): seqs = parse_fasta (path + '/' + fil) for seq in seqs: seqs[seq]['seq'] = seqs[seq]['seq'][::-1] write_rfasta (seqs, path + '/' + fil)
def trim_columns(sequences, opts, tmp_dir): aali_path = tmp_dir + '/aligned.fasta' write_rfasta(sequences, aali_path, what='aa_ali') trimcl_path = tmp_dir + '/trimmed.fasta' if opts.trimcol == 'specific': cmds = [BINARIES['trimal']['bin'], '-in' , aali_path, '-out', trimcl_path, '-gt' , str (opts.gaptreshold), '-st' , str (opts.similarity), '-colnumbering'] else: cmds = [BINARIES['trimal']['bin'], '-in' , aali_path, '-out', trimcl_path, '-' + opts.trimcol, '-colnumbering'] proc = Popen(cmds, stdout=PIPE, stderr=PIPE) (keeplist, err) = proc.communicate() LOG.append('') if 'ERROR' in err: exit('ERROR: trimming columns:\n' + err) keeplist = str (keeplist).strip().split(', ') algt = get_alignment(sequences, typ=('aa_ali' if opts.aa else 'codon')) nnn = compil('[A-Z]{3}') if opts.nogap: for (col, num) in zip (algt, range (len (algt))): if not str(num) in keeplist: algt[num] = [ nnn.sub('', x) for x in col ] algt[num] = [ compil('---').sub('', x) for x in algt[num]] else: for (col, num) in zip (algt, range (len (algt))): if not str(num) in keeplist: algt[num] = [ nnn.sub('NNN', x) for x in col ] for (key, seq) in zip (sorted (sequences.keys()), zip (*algt)): sequences[key]['codon'] = seq
def main(): """ main function """ opts = get_options() genetic_code = None if opts.aa else get_genetic_code(opts.code) sequences = parse_fasta(opts.fastafile, genetic_code) tmp_dir = dirname( opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp') Popen('mkdir -p ' + tmp_dir, shell=True).communicate() ### if we need to align: # write sense and anti-sense translated sequences this = 'seq' if opts.aa else 'prot' write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this) if opts.align == 2: write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True) # run alignment if opts.align: run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align) # merge all in one, keep only sites with score better than m_coffee cut aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] if len(aligners) > 1 or opts.align == 2: merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa) else: aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0]) for seq in sequences: sequences[seq]['aa_ali'] = aa_ali[seq]['seq'] for elt in xrange(len(sequences[seq]['aa_ali'])): if sequences[seq]['aa_ali'][elt] == '-': sequences[seq]['codon'].insert(elt, '---') continue # trimal if opts.trimseq: trim_sequences(tmp_dir, opts.outfile, sequences, opts.trimseq, quiet=opts.quiet) if opts.trimcol != 'None': trim_columns(sequences, opts, tmp_dir) # write codon sequences if opts.aa: write_fasta(sequences, opts.outfile, what='aa_ali') else: write_fasta(sequences, opts.outfile, what='codon') # print map if opts.printmap: printmap(sequences, opts.outfile + '.map', opts.pymap)
def main(): """ main function """ opts = get_options() genetic_code = None if opts.aa else get_genetic_code (opts.code) sequences = parse_fasta (opts.fastafile, genetic_code) tmp_dir = dirname(opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp') Popen('mkdir -p ' + tmp_dir, shell=True).communicate() ### if we need to align: # write sense and anti-sense translated sequences this = 'seq' if opts.aa else 'prot' write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this) if opts.align == 2: write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True) # run alignment if opts.align: run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align) # merge all in one, keep only sites with score better than m_coffee cut aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]] if len(aligners) > 1 or opts.align == 2: merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa) else: aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0]) for seq in sequences: sequences[seq]['aa_ali'] = aa_ali[seq]['seq'] for elt in xrange(len(sequences[seq]['aa_ali'])): if sequences[seq]['aa_ali'][elt] == '-': sequences[seq]['codon'].insert(elt, '---') continue # trimal if opts.trimseq: trim_sequences(tmp_dir, opts.outfile, sequences, opts.trimseq, quiet=opts.quiet) if opts.trimcol != 'None': trim_columns(sequences, opts, tmp_dir) # write codon sequences if opts.aa: write_fasta(sequences, opts.outfile, what='aa_ali') else: write_fasta(sequences, opts.outfile, what='codon') # print map if opts.printmap: printmap(sequences, opts.outfile + '.map', opts.pymap)
def trim_columns(sequences, opts, tmp_dir): aali_path = tmp_dir + '/aligned.fasta' write_rfasta(sequences, aali_path, what='aa_ali') trimcl_path = tmp_dir + '/trimmed.fasta' if opts.trimcol == 'specific': cmds = [ BINARIES['trimal']['bin'], '-in', aali_path, '-out', trimcl_path, '-gt', str(opts.gaptreshold), '-st', str(opts.similarity), '-colnumbering' ] else: cmds = [ BINARIES['trimal']['bin'], '-in', aali_path, '-out', trimcl_path, '-' + opts.trimcol, '-colnumbering' ] proc = Popen(cmds, stdout=PIPE, stderr=PIPE) (keeplist, err) = proc.communicate() LOG.append('') if 'ERROR' in err: exit('ERROR: trimming columns:\n' + err) keeplist = str(keeplist).strip().split(', ') algt = get_alignment(sequences) nnn = compil('[A-Z]{3}') if opts.nogap: for (col, num) in zip(algt, range(len(algt))): if not str(num) in keeplist: algt[num] = [nnn.sub('', x) for x in col] algt[num] = [compil('---').sub('', x) for x in algt[num]] else: for (col, num) in zip(algt, range(len(algt))): if not str(num) in keeplist: algt[num] = [nnn.sub('NNN', x) for x in col] for (key, seq) in zip(sorted(sequences.keys()), zip(*algt)): sequences[key]['codon'] = seq