Exemple #1
0
def main():
    """
    main function
    """
    opts = get_options()
    genetic_code = None if opts.aa else get_genetic_code(opts.code)
    sequences = parse_fasta(opts.fastafile, genetic_code)
    tmp_dir = dirname(
        opts.outfile) + ('/tmp' if '/' in opts.outfile else 'tmp')

    Popen('mkdir -p ' + tmp_dir, shell=True).communicate()

    ### if we need to align:
    # write sense and anti-sense translated sequences
    this = 'seq' if opts.aa else 'prot'
    write_rfasta(sequences, tmp_dir + '/prot.fasta', what=this)
    if opts.align == 2:
        write_rfasta(sequences, tmp_dir + '/torp.fasta', what=this, rev=True)
    # run alignment
    if opts.align:
        run_alignments(tmp_dir, opts.cpus, opts.quiet, opts.align)
    # merge all in one, keep only sites with score better than m_coffee cut
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    if len(aligners) > 1 or opts.align == 2:
        merge_mcoffee(tmp_dir, opts.mcoffee_cut, sequences, aa=opts.aa)
    else:
        aa_ali = parse_fasta(tmp_dir + '/prot.fasta_' + aligners[0])
        for seq in sequences:
            sequences[seq]['aa_ali'] = aa_ali[seq]['seq']
            for elt in xrange(len(sequences[seq]['aa_ali'])):
                if sequences[seq]['aa_ali'][elt] == '-':
                    sequences[seq]['codon'].insert(elt, '---')
                    continue

    # trimal
    if opts.trimseq:
        trim_sequences(tmp_dir,
                       opts.outfile,
                       sequences,
                       opts.trimseq,
                       quiet=opts.quiet)
    if opts.trimcol != 'None':
        trim_columns(sequences, opts, tmp_dir)

    # write codon sequences
    if opts.aa:
        write_fasta(sequences, opts.outfile, what='aa_ali')
    else:
        write_fasta(sequences, opts.outfile, what='codon')

    # print map
    if opts.printmap:
        printmap(sequences, opts.outfile + '.map', opts.pymap)
Exemple #2
0
def run_alignments(path, cpus=1, quiet=False, tries=2):
    """
    """
    procs = []
    files = []
    aligners = [ali for ali in BINARIES if 'fun' in BINARIES[ali]]
    for ali in sorted(
            aligners,
            key=lambda x: ['probcons', 'dialign', 'muscle', 'mafft'].index(x)):
        if not quiet:
            print 'Aligning with: ' + ali
        for sense in ['prot', 'torp'][:tries]:
            if not quiet:
                print '  -> ' + ('sense' if sense == 'prot' else 'anti-sense')
            files.append('%s.fasta_%s' % (sense, ali))
            procs.append(BINARIES[ali]['fun'](path + '/%s.fasta' % (sense)))
            if len(procs) < cpus: continue
            while len(procs) != 0:
                out, err = procs.pop(0).communicate()
                if 'ERROR' in err:
                    print >> stderr, out, err
                    raise Exception('\nERROR: running alignments')
    while len(procs) != 0:
        out, err = procs.pop(0).communicate()
        if 'ERROR' in err:
            print >> stderr, out, err
            raise Exception('\nERROR: running alignments')
    for fil in files:
        if fil.startswith('torp'):
            seqs = parse_fasta(path + '/' + fil)
            for seq in seqs:
                seqs[seq]['seq'] = seqs[seq]['seq'][::-1]
            write_rfasta(seqs, path + '/' + fil)
Exemple #3
0
def trim_sequences(path, seq_path, seqs, trimseq, quiet=True):
    trimsq_path = path + '/seq_trimmed.fasta'
    proc = Popen([
        BINARIES['trimal']['bin'], '-in', seq_path, '-out', trimsq_path,
        '-resoverlap',
        str(trimseq[1]), '-seqoverlap',
        str(trimseq[2]), '-cons', '100'
    ],
                 stdout=PIPE)
    if proc.communicate()[1] is not None:
        print >> stderr, proc.communicate()[0]
        exit('\nERROR: trimming sequences')

    trimmed = parse_fasta(trimsq_path)
    for seq in trimmed:
        seqs[seq]['ali'] = trimmed[seq]['seq']

    trimmed = filter(lambda x: not seqs[x].has_key('ali'), seqs)
    if not quiet:
        print >> stderr, 'WARNING: trimmed sequences: \n\t' + \
              '\n\t'.join(trimmed)
    LOG.append('')
    if len(trimmed) > 0:
        LOG[-1] += '->trimmed sequences: \n\t' + \
               '\n\t'.join(trimmed) + '\n'
    else:
        LOG[-1] += '->no trimmed sequences\n'

    for s in seqs.keys():
        if s in trimmed:
            del (seqs[s])
    return trimsq_path