def blastx_results(ofile, srcDB, destDB, xformSrc=True, xformDest=False, **kwargs): '''store blastx or tblastx results as a list of individual hits. Each hit is stored as the usual NLMSASlice interface (e.g. use its edges() method to get src,dest,edgeInfo tuples''' p = BlastHitParser() alignedIvals = read_aligned_coords(p.parse_file(ofile), srcDB, destDB, dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori')) l = [] for t in generate_tblastn_ivals(alignedIvals, xformSrc, xformDest): if isinstance(t, CoordsGroupStart): al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True) elif isinstance(t, CoordsGroupEnd): # process all ivals in this hit al.build() l.append(al[queryORF]) # save NLMSASlice view of this hit else: # just keep accumulating all the ivals for this hit al += t[0] al[t[0]][t[1]] = None # save their alignment queryORF = t[0].path return l
def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None, translateSrc=False, translateDest=False): """Apply sequence of transforms to read input from 'ofile'. srcDB: database for finding query sequences from the blast input; destDB: database for finding subject sequences from the blast input; al, if not None, must be a writeable alignment object in which to store the alignment intervals; translateSrc=True forces creation of a TranslationDB representing the possible 6-frames of srcDB (for blastx, tblastx); translateDest=True forces creation of a TranslationDB representing the possible 6-frames of destDB (for tblastn, tblastx). If pipeline is not None, it must be a list of filter functions each taking a single argument and returning an iterator or iterable result object. """ p = BlastHitParser() d = dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori') if translateSrc: srcDB = translationDB.get_translation_db(srcDB) if translateDest: destDB = translationDB.get_translation_db(destDB) cti = CoordsToIntervals(srcDB, destDB, d) alignedIvals = cti(p.parse_file(ofile)) if pipeline is None: result = save_interval_alignment(alignedIvals, al) else: # apply all the filters in our pipeline one by one result = alignedIvals for f in pipeline: result = f(result) return result
def read_interval_alignment(ofile, srcDB, destDB, al=None, **kwargs): "Read tab-delimited interval mapping between seqs from the 2 sets of seqs" needToBuild = False if al is None: import cnestedlist al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True) needToBuild = True p = BlastHitParser() al.add_aligned_intervals(p.parse_file(ofile), srcDB, destDB, alignedIvalsAttrs= dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori'), **kwargs) if p.nline == 0: # NO BLAST OUTPUT?? raise IOError('no BLAST output. Check that blastall is in your PATH') if needToBuild: al.build() return al
def read_blast_alignment(ofile, srcDB, destDB, al=None, pipeline=None): """Apply sequence of transforms to read input from 'ofile'. BlastHitParser; CoordsToIntervals; save_interval_alignment OR [pipeline] If pipeline is not None, it must be a list of filter functions each taking a single argument and returning an iterator or iterable result object. """ p = BlastHitParser() d = dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori') cti = CoordsToIntervals(srcDB, destDB, d) alignedIvals = cti(p.parse_file(ofile)) if pipeline is None: result = save_interval_alignment(alignedIvals, al) else: # apply all the filters in our pipeline one by one result = alignedIvals for f in pipeline: result = f(result) return result
def __init__(self, ofile, srcDB, destDB, xformSrc=True, xformDest=False, **kwargs): import cnestedlist p = BlastHitParser() alignedIvals = read_aligned_coords(p.parse_file(ofile), srcDB, destDB, dict(id='src_id', start='src_start', stop='src_end', ori='src_ori', idDest='dest_id', startDest='dest_start', stopDest='dest_end', oriDest='dest_ori')) l = [] for t in generate_tblastn_ivals(alignedIvals, xformSrc, xformDest): if isinstance(t, CoordsGroupStart): al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True) elif isinstance(t, CoordsGroupEnd): # process all ivals in this hit al.build() l.append(al[queryORF]) # save NLMSASlice view of this hit else: # just keep accumulating all the ivals for this hit al += t[0] al[t[0]][t[1]] = None # save their alignment queryORF = t[0].path self.hits = l