Esempio n. 1
0
def blastx_results(ofile, srcDB, destDB, xformSrc=True, xformDest=False,
                   **kwargs):
    '''store blastx or tblastx results as a list of individual hits.
    Each hit is stored as the usual NLMSASlice interface (e.g.
    use its edges() method to get src,dest,edgeInfo tuples'''
    p = BlastHitParser()
    alignedIvals = read_aligned_coords(p.parse_file(ofile), srcDB, destDB,
                                       dict(id='src_id', start='src_start',
                                            stop='src_end', ori='src_ori',
                                            idDest='dest_id',
                                            startDest='dest_start',
                                            stopDest='dest_end',
                                            oriDest='dest_ori'))
    l = []
    for t in generate_tblastn_ivals(alignedIvals, xformSrc, xformDest):
        if isinstance(t, CoordsGroupStart):
            al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True)
        elif isinstance(t, CoordsGroupEnd): # process all ivals in this hit
            al.build()
            l.append(al[queryORF]) # save NLMSASlice view of this hit
        else: # just keep accumulating all the ivals for this hit
            al += t[0]
            al[t[0]][t[1]] = None # save their alignment
            queryORF = t[0].path
    return l
Esempio n. 2
0
def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None,
                         translateSrc=False, translateDest=False):
    """Apply sequence of transforms to read input from 'ofile'.

    srcDB: database for finding query sequences from the blast input;

    destDB: database for finding subject sequences from the blast input;

    al, if not None, must be a writeable alignment object in which to
    store the alignment intervals;

    translateSrc=True forces creation of a TranslationDB representing
    the possible 6-frames of srcDB (for blastx, tblastx);

    translateDest=True forces creation of a TranslationDB representing
    the possible 6-frames of destDB (for tblastn, tblastx).

    If pipeline is not None, it must be a list of filter functions each
    taking a single argument and returning an iterator or iterable result
    object.
    """
    p = BlastHitParser()
    d = dict(id='src_id',
             start='src_start',
             stop='src_end',
             ori='src_ori',
             idDest='dest_id',
             startDest='dest_start',
             stopDest='dest_end',
             oriDest='dest_ori')
    if translateSrc:
        srcDB = translationDB.get_translation_db(srcDB)
    if translateDest:
        destDB = translationDB.get_translation_db(destDB)
    cti = CoordsToIntervals(srcDB, destDB, d)
    alignedIvals = cti(p.parse_file(ofile))
    if pipeline is None:
        result = save_interval_alignment(alignedIvals, al)
    else: # apply all the filters in our pipeline one by one
        result = alignedIvals
        for f in pipeline:
            result = f(result)
    return result
Esempio n. 3
0
def read_interval_alignment(ofile, srcDB, destDB, al=None, **kwargs):
    "Read tab-delimited interval mapping between seqs from the 2 sets of seqs"
    needToBuild = False
    if al is None:
        import cnestedlist
        al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True)
        needToBuild = True
    p = BlastHitParser()
    al.add_aligned_intervals(p.parse_file(ofile), srcDB, destDB,
                             alignedIvalsAttrs=
                             dict(id='src_id', start='src_start',
                                  stop='src_end', ori='src_ori',
                                  idDest='dest_id', startDest='dest_start',
                                  stopDest='dest_end', oriDest='dest_ori'),
                             **kwargs)
    if p.nline == 0: # NO BLAST OUTPUT??
        raise IOError('no BLAST output.  Check that blastall is in your PATH')
    if needToBuild:
        al.build()
    return al
Esempio n. 4
0
def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None):
    """Apply sequence of transforms to read input from 'ofile'.
    
    BlastHitParser; CoordsToIntervals; save_interval_alignment OR [pipeline]
    If pipeline is not None, it must be a list of filter functions each
    taking a single argument and returning an iterator or iterable result
    object.
    """
    p = BlastHitParser()
    d = dict(id='src_id', start='src_start', stop='src_end', ori='src_ori',
             idDest='dest_id', startDest='dest_start',
             stopDest='dest_end', oriDest='dest_ori')
    cti = CoordsToIntervals(srcDB, destDB, d)
    alignedIvals = cti(p.parse_file(ofile))
    if pipeline is None:
        result = save_interval_alignment(alignedIvals, al)
    else: # apply all the filters in our pipeline one by one
        result = alignedIvals
        for f in pipeline:
            result = f(result)
    return result
Esempio n. 5
0
 def __init__(self, ofile, srcDB, destDB, xformSrc=True, xformDest=False,
              **kwargs):
     import cnestedlist
     p = BlastHitParser()
     alignedIvals = read_aligned_coords(p.parse_file(ofile), srcDB, destDB,
                                        dict(id='src_id', start='src_start',
                                             stop='src_end', ori='src_ori',
                                             idDest='dest_id',
                                             startDest='dest_start',
                                             stopDest='dest_end',
                                             oriDest='dest_ori'))
     l = []
     for t in generate_tblastn_ivals(alignedIvals, xformSrc, xformDest):
         if isinstance(t, CoordsGroupStart):
             al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True)
         elif isinstance(t, CoordsGroupEnd): # process all ivals in this hit
             al.build()
             l.append(al[queryORF]) # save NLMSASlice view of this hit
         else: # just keep accumulating all the ivals for this hit
             al += t[0]
             al[t[0]][t[1]] = None # save their alignment
             queryORF = t[0].path
     self.hits = l