Exemple #1
0
def run_alignments(path, cpus=1, quiet=False, tries=2):
    """
    """
    procs = []
    files = []
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    for ali in sorted(
            aligners,
            key=lambda x: ['probcons', 'dialign', 'muscle', 'mafft'].index(x)):
        if not quiet:
            print 'Aligning with: ' + ali
        for sense in ['prot', 'torp'][:tries]:
            if not quiet:
                print '  -> ' + ('sense' if sense == 'prot' else 'anti-sense')
            files.append('%s.fasta_%s' % (sense, ali))
            procs.append(BINARIES[ali]['fun'](path + '/%s.fasta' % (sense)))
            if len(procs) < cpus: continue
            while len(procs) != 0:
                out, err = procs.pop(0).communicate()
                if 'ERROR' in err:
                    print >> stderr, out, err
                    raise Exception('\nERROR: running alignments')
    while len(procs) != 0:
        out, err = procs.pop(0).communicate()
        if 'ERROR' in err:
            print >> stderr, out, err
            raise Exception('\nERROR: running alignments')
    for fil in files:
        if fil.startswith('torp'):
            seqs = parse_fasta(path + '/' + fil)
            for seq in seqs:
                seqs[seq]['seq'] = seqs[seq]['seq'][::-1]
            write_rfasta(seqs, path + '/' + fil)
Exemple #2
0
def run_alignments(path, cpus=1, quiet=False, tries=2):
    """
    """
    procs = []
    files = []
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    for ali in sorted(aligners, key=lambda x:['probcons', 'dialign',
                                              'muscle','mafft'].index(x)):
        if not quiet:
            print 'Aligning with: ' + ali
        for sense in ['prot', 'torp'][:tries]:
            if not quiet:
                print '  -> ' + ('sense' if sense == 'prot' else 'anti-sense')
            files.append('%s.fasta_%s' % (sense, ali))
            procs.append (BINARIES[ali]['fun'](path + '/%s.fasta' % (sense)))
            if len (procs) < cpus: continue
            while len (procs) != 0:
                out, err = procs.pop(0).communicate()
                if 'ERROR' in err:
                    print >> stderr, out, err
                    raise Exception ('\nERROR: running alignments')
    while len (procs) != 0:
        out, err = procs.pop(0).communicate()
        if 'ERROR' in err:
            print >> stderr, out, err
            raise Exception ('\nERROR: running alignments')
    for fil in files:
        if fil.startswith('torp'):
            seqs = parse_fasta (path + '/' + fil)
            for seq in seqs:
                seqs[seq]['seq'] = seqs[seq]['seq'][::-1]
            write_rfasta (seqs, path + '/' + fil)
Exemple #3
0
def trim_columns(sequences, opts, tmp_dir):
    aali_path = tmp_dir + '/aligned.fasta'
    write_rfasta(sequences, aali_path, what='aa_ali')
    trimcl_path = tmp_dir + '/trimmed.fasta'
    if opts.trimcol == 'specific':
        cmds = [BINARIES['trimal']['bin'], '-in' , aali_path,
                '-out', trimcl_path, '-gt' , str (opts.gaptreshold),
                '-st' , str (opts.similarity), '-colnumbering']
    else:
        cmds = [BINARIES['trimal']['bin'], '-in' , aali_path,
                '-out', trimcl_path, '-' + opts.trimcol,
                '-colnumbering']
    proc = Popen(cmds, stdout=PIPE, stderr=PIPE)
    (keeplist, err) = proc.communicate()
    LOG.append('')
    if 'ERROR' in err:
        exit('ERROR: trimming columns:\n' + err)

    keeplist = str (keeplist).strip().split(', ')

    algt = get_alignment(sequences, typ=('aa_ali' if opts.aa else 'codon'))
    nnn = compil('[A-Z]{3}')
    if opts.nogap: 
        for (col, num) in zip (algt, range (len (algt))):
            if not str(num) in keeplist:
                algt[num] = [ nnn.sub('', x) for x in  col ]
                algt[num] = [ compil('---').sub('', x) for x in algt[num]]
    else:
        for (col, num) in zip (algt, range (len (algt))):
            if not str(num) in keeplist:
                algt[num] = [ nnn.sub('NNN', x) for x in col ]
    for (key, seq) in zip (sorted (sequences.keys()), zip (*algt)):
        sequences[key]['codon'] = seq
Exemple #4
0
def main():
    """
    main function
    """
    opts = get_options()
    genetic_code = None if opts.aa else get_genetic_code(opts.code)
    sequences = parse_fasta(opts.fastafile, genetic_code)
    tmp_dir = dirname(
        opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp')

    Popen('mkdir -p ' + tmp_dir, shell=True).communicate()

    ### if we need to align:
    # write sense and anti-sense translated sequences
    this = 'seq' if opts.aa else 'prot'
    write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this)
    if opts.align == 2:
        write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True)
    # run alignment
    if opts.align:
        run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align)
    # merge all in one, keep only sites with score better than m_coffee cut
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    if len(aligners) > 1 or opts.align == 2:
        merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa)
    else:
        aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0])
        for seq in sequences:
            sequences[seq]['aa_ali'] = aa_ali[seq]['seq']
            for elt in xrange(len(sequences[seq]['aa_ali'])):
                if sequences[seq]['aa_ali'][elt] == '-':
                    sequences[seq]['codon'].insert(elt, '---')
                    continue

    # trimal
    if opts.trimseq:
        trim_sequences(tmp_dir,
                       opts.outfile,
                       sequences,
                       opts.trimseq,
                       quiet=opts.quiet)
    if opts.trimcol != 'None':
        trim_columns(sequences, opts, tmp_dir)

    # write codon sequences
    if opts.aa:
        write_fasta(sequences, opts.outfile, what='aa_ali')
    else:
        write_fasta(sequences, opts.outfile, what='codon')

    # print map
    if opts.printmap:
        printmap(sequences, opts.outfile + '.map', opts.pymap)
Exemple #5
0
def main():
    """
    main function
    """
    opts         = get_options()
    genetic_code = None if opts.aa else get_genetic_code (opts.code)
    sequences    = parse_fasta (opts.fastafile, genetic_code)
    tmp_dir      = dirname(opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp')
    
    Popen('mkdir -p ' + tmp_dir, shell=True).communicate()

    ### if we need to align:
    # write sense and anti-sense translated sequences
    this = 'seq' if opts.aa else 'prot'
    write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this)
    if opts.align == 2:
        write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True)
    # run alignment
    if opts.align:
        run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align)
    # merge all in one, keep only sites with score better than m_coffee cut
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    if len(aligners) > 1 or opts.align == 2:
        merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa)
    else:
        aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0])
        for seq in sequences:
            sequences[seq]['aa_ali'] = aa_ali[seq]['seq']
            for elt in xrange(len(sequences[seq]['aa_ali'])):
                if sequences[seq]['aa_ali'][elt] == '-':
                    sequences[seq]['codon'].insert(elt, '---')
                    continue
    
    # trimal
    if opts.trimseq:
        trim_sequences(tmp_dir, opts.outfile, sequences,
                       opts.trimseq, quiet=opts.quiet)
    if opts.trimcol != 'None':
        trim_columns(sequences, opts, tmp_dir)

    # write codon sequences
    if opts.aa:
        write_fasta(sequences, opts.outfile, what='aa_ali')
    else:
        write_fasta(sequences, opts.outfile, what='codon')

    # print map
    if opts.printmap:
        printmap(sequences, opts.outfile + '.map', opts.pymap)
Exemple #6
0
def trim_columns(sequences, opts, tmp_dir):
    aali_path = tmp_dir + '/aligned.fasta'
    write_rfasta(sequences, aali_path, what='aa_ali')
    trimcl_path = tmp_dir + '/trimmed.fasta'
    if opts.trimcol == 'specific':
        cmds = [
            BINARIES['trimal']['bin'], '-in', aali_path, '-out', trimcl_path,
            '-gt',
            str(opts.gaptreshold), '-st',
            str(opts.similarity), '-colnumbering'
        ]
    else:
        cmds = [
            BINARIES['trimal']['bin'], '-in', aali_path, '-out', trimcl_path,
            '-' + opts.trimcol, '-colnumbering'
        ]
    proc = Popen(cmds, stdout=PIPE, stderr=PIPE)
    (keeplist, err) = proc.communicate()
    LOG.append('')
    if 'ERROR' in err:
        exit('ERROR: trimming columns:\n' + err)

    keeplist = str(keeplist).strip().split(', ')

    algt = get_alignment(sequences)
    nnn = compil('[A-Z]{3}')
    if opts.nogap:
        for (col, num) in zip(algt, range(len(algt))):
            if not str(num) in keeplist:
                algt[num] = [nnn.sub('', x) for x in col]
                algt[num] = [compil('---').sub('', x) for x in algt[num]]
    else:
        for (col, num) in zip(algt, range(len(algt))):
            if not str(num) in keeplist:
                algt[num] = [nnn.sub('NNN', x) for x in col]
    for (key, seq) in zip(sorted(sequences.keys()), zip(*algt)):
        sequences[key]['codon'] = seq