Ejemplo n.º 1
0
def BLAST(seq, seqs, evalue=10):
    lib_dir = temp('blast-' + str(id(seqs)))
    res_file = temp('blast-' + str(id(seqs)), str(id(seq)))

    dbtype = 'nucl' if isinstance(seqs[0], NASeq) else 'prot'
    seqtype = 'nucl' if isinstance(seq, NASeq) else 'prot'

    if not op.exists(op.join(lib_dir, seqs.get_file('fa') + '.phr')) \
      and not op.exists(op.join(lib_dir, seqs.get_file('fa') + '.nhr')):
        c = [[app('BLAST','makeblastdb'),'-in',seqs.get_file('fa'),'-out',lib_dir,'-parse_seqids','-dbtype',dbtype]]
    else:
        c = []

    if dbtype == 'nucl' and seqtype == 'nucl':
        blprog = 'blastn'  # TBLASTX
    elif dbtype == 'nucl' and seqtype == 'prot':
        blprog = 'blastx'
    elif dbtype == 'prot' and seqtype == 'nucl':
        blprog = 'tblastn'
    elif dbtype == 'prot' and seqtype == 'prot':
        blprog = 'blastp'  # PSIBLAST, PHIBLAST

    c += [[app('BLAST',blprog),'-db',lib_dir,'-query',seq.get_file('fa'),'-outfmt','"6 sseqid"','-out',res_file+'-out','-evalue',str(evalue)]]
    c += [[app('BLAST','blastdbcmd'),'-db',lib_dir,'-entry_batch',res_file+'-out','-out',res_file+'.fa']]
    run(c)

    return SeqList(open(res_file + '.fa', 'r'))
Ejemplo n.º 2
0
def Trimmomatic(seqs, headcrop=None, leading=None, trailing=None, slidingwin=None, minlen=None, illuminaclip=None):
    if isinstance(seqs, PairedFileSeqList):
        tend = 'PE'
        f1, f2 = seqs._f1.get_file('fq'), seqs._f2.get_file('fq')
        flist = [f1, f2, \
                f1 + '.trim.prd.fq', f1 + '.trim.uprd.fq', \
                f2 + '.trim.prd.fq', f2 + '.trim.uprd.fq']
    elif isinstance(seqs, SeqList):
        tend = 'SE'
        f1 = seqs.get_file('fq')
        flist = [f1, f1 + '.trim.fq']
    else:
        raise TypeError('Need a PairedFileSeqList, FileSeqList or SeqList.')

    qual = '-phred33'  # TODO: determine this from the data
    c = [app('JAVA', 'java'), '-classpath', \
      app('TRIMMOMATIC', 'trimmomatic-0.22.jar'), \
      'org.usadellab.trimmomatic.Trimmomatic' + tend, qual]
    c += flist

    if illuminaclip is not None:
        c += ['ILLUMINACLIP:' + str(illuminaclip.get_file('fa')) + \
          ':2:40:15']  # sane defaults?
    if leading is not None:
        c += ['LEADING:' + str(leading)]
    if trailing is not None:
        c += ['TRAILING:' + str(trailing)]
    if slidingwin is not None:
        c += ['SLIDINGWINDOW:' + str(slidingwin[0]) + ':' + \
          str(slidingwin[1])]
    if headcrop is not None:
        c += ['HEADCROP:' + str(headcrop)]
    if minlen is not None:
        c += ['MINLEN:' + str(minlen)]
    run([c])
    if tend == 'PE':
        return PairedFileSeqList(f1 + '.trim.prd.fq',\
                                 f2 + '.trim.prd.fq')
    else:
        return SeqList(f1 + '.trim.fq')