Ejemplo n.º 1
0
def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None,
                         translateSrc=False, translateDest=False):
    """Apply sequence of transforms to read input from 'ofile'.

    srcDB: database for finding query sequences from the blast input;

    destDB: database for finding subject sequences from the blast input;

    al, if not None, must be a writeable alignment object in which to
    store the alignment intervals;

    translateSrc=True forces creation of a TranslationDB representing
    the possible 6-frames of srcDB (for blastx, tblastx);

    translateDest=True forces creation of a TranslationDB representing
    the possible 6-frames of destDB (for tblastn, tblastx).

    If pipeline is not None, it must be a list of filter functions each
    taking a single argument and returning an iterator or iterable result
    object.
    """
    p = BlastHitParser()
    d = dict(id='src_id',
             start='src_start',
             stop='src_end',
             ori='src_ori',
             idDest='dest_id',
             startDest='dest_start',
             stopDest='dest_end',
             oriDest='dest_ori')
    if translateSrc:
        srcDB = translationDB.get_translation_db(srcDB)
    if translateDest:
        destDB = translationDB.get_translation_db(destDB)
    cti = CoordsToIntervals(srcDB, destDB, d)
    alignedIvals = cti(p.parse_file(ofile))
    if pipeline is None:
        result = save_interval_alignment(alignedIvals, al)
    else: # apply all the filters in our pipeline one by one
        result = alignedIvals
        for f in pipeline:
            result = f(result)
    return result
Ejemplo n.º 2
0
    def __getitem__(self, query):
        """generate slices for all translations of the query """
        # generate NLMSA for this single sequence
        al = self(query)
        # get the translation database for the sequence
        tdb = translationDB.get_translation_db(query.db)

        # run through all of the frames & find alignments.
        slices = []
        for trans_seq in tdb[query.id].iter_frames():
            try:
                slice = al[trans_seq]
            except KeyError:
                continue

            if not isinstance(slice, EmptySlice):
                slices.append(slice)

        return slices