Beispiel #1
0
def parseM8FileIter(
        inhandle,
        hitStringMap,
        format,
        scorePct,
        parsingStyle,
        countMethod,
        taxonomy=None,
        rank=None,
        ignoreEmptyHits=True,
        sortReads=False):
    """
    Wrapper method that combines filterM8, parseHits, and process hits to:
        filter hits using format and scorePct
        map reads to hits using parseHits
        translate hits using processHits

    If taxonomy is not None, hits will be TaxNode objects
    contMethod can only be LCA if taxonomy given

    Return an iterator over (read,hits) tuples.
    """

    # check filtering options
    if countMethod == 'first':
        scorePct = -1

    # get map from reads to lists of hit strings
    logger.info("Parsing hits")
    options = FilterParams()
    options.format = format
    if scorePct >= 0 or sortReads:
        # filter hits on score if requested
        if scorePct >= 0:
            logger.info(
                "Filtering for scores within %s pct of best" %
                scorePct)
            options.topPct = scorePct
            options.sort = 'score'
        options.sortReads = sortReads
        # filters and parses
    options.parseStyle = parsingStyle
    hitIter = filterM8Stream(inhandle, options, returnLines=False)

    # apply org or acc translation
    # apply map of hit names if given'
    # look up taxon node
    hitIter = processHits(
        hitIter,
        hitStringMap=hitStringMap,
        parseStyle=parsingStyle,
        taxonomy=taxonomy,
        rank=rank)

    # apply count method
    hitIter = applyCountMethod(hitIter, countMethod, ignoreEmptyHits)

    return hitIter
Beispiel #2
0
def parseM8FileIter(inhandle, hitStringMap, format, scorePct, parsingStyle, countMethod, taxonomy=None, rank=None, ignoreEmptyHits=True, sortReads=False):
    """
    Wrapper method that combines filterM8, parseHits, and process hits to:
        filter hits using format and scorePct
        map reads to hits using parseHits
        translate hits using processHits

    If taxonomy is not None, hits will be TaxNode objects
    contMethod can only be LCA if taxonomy given

    Return an iterator over (read,hits) tuples.
    """

    # check filtering options
    if countMethod == 'first':
        scorePct=-1

    # setup some variables
    infoInDescription = parsingStyle in [KEGG,ORGS]

    # get map from reads to lists of hit strings
    logger.info("Parsing hits")
    options=FilterParams()
    options.format=format
    if scorePct >= 0 or sortReads:
        # filter hits on score if requested
        if scorePct>=0:
            logger.info("Filtering for scores within %s pct of best" % scorePct)
            options.topPct=scorePct
            options.sort='score'
        options.sortReads=sortReads
        # filters and parses
    options.parseStyle=parsingStyle
    hitIter=filterM8Stream(inhandle, options, returnLines=False)

    # apply org or acc translation
    # apply map of hit names if given'
    # look up taxon node
    hitIter = processHits(hitIter, hitStringMap=hitStringMap, parseStyle=parsingStyle, taxonomy=taxonomy, rank=rank)

    #debugKey="F4UZ9WW02HMBZJ"
    #logger.debug("Hits for %s: %r" % (debugKey,hitMap[debugKey]))

    # apply count method
    hitIter=applyCountMethod(hitIter, countMethod, ignoreEmptyHits)

    return hitIter