def parseM8FileIter( inhandle, hitStringMap, format, scorePct, parsingStyle, countMethod, taxonomy=None, rank=None, ignoreEmptyHits=True, sortReads=False): """ Wrapper method that combines filterM8, parseHits, and process hits to: filter hits using format and scorePct map reads to hits using parseHits translate hits using processHits If taxonomy is not None, hits will be TaxNode objects contMethod can only be LCA if taxonomy given Return an iterator over (read,hits) tuples. """ # check filtering options if countMethod == 'first': scorePct = -1 # get map from reads to lists of hit strings logger.info("Parsing hits") options = FilterParams() options.format = format if scorePct >= 0 or sortReads: # filter hits on score if requested if scorePct >= 0: logger.info( "Filtering for scores within %s pct of best" % scorePct) options.topPct = scorePct options.sort = 'score' options.sortReads = sortReads # filters and parses options.parseStyle = parsingStyle hitIter = filterM8Stream(inhandle, options, returnLines=False) # apply org or acc translation # apply map of hit names if given' # look up taxon node hitIter = processHits( hitIter, hitStringMap=hitStringMap, parseStyle=parsingStyle, taxonomy=taxonomy, rank=rank) # apply count method hitIter = applyCountMethod(hitIter, countMethod, ignoreEmptyHits) return hitIter
def parseM8FileIter(inhandle, hitStringMap, format, scorePct, parsingStyle, countMethod, taxonomy=None, rank=None, ignoreEmptyHits=True, sortReads=False): """ Wrapper method that combines filterM8, parseHits, and process hits to: filter hits using format and scorePct map reads to hits using parseHits translate hits using processHits If taxonomy is not None, hits will be TaxNode objects contMethod can only be LCA if taxonomy given Return an iterator over (read,hits) tuples. """ # check filtering options if countMethod == 'first': scorePct=-1 # setup some variables infoInDescription = parsingStyle in [KEGG,ORGS] # get map from reads to lists of hit strings logger.info("Parsing hits") options=FilterParams() options.format=format if scorePct >= 0 or sortReads: # filter hits on score if requested if scorePct>=0: logger.info("Filtering for scores within %s pct of best" % scorePct) options.topPct=scorePct options.sort='score' options.sortReads=sortReads # filters and parses options.parseStyle=parsingStyle hitIter=filterM8Stream(inhandle, options, returnLines=False) # apply org or acc translation # apply map of hit names if given' # look up taxon node hitIter = processHits(hitIter, hitStringMap=hitStringMap, parseStyle=parsingStyle, taxonomy=taxonomy, rank=rank) #debugKey="F4UZ9WW02HMBZJ" #logger.debug("Hits for %s: %r" % (debugKey,hitMap[debugKey])) # apply count method hitIter=applyCountMethod(hitIter, countMethod, ignoreEmptyHits) return hitIter