Ejemplo n.º 1
0
def run_real_quiver(cmpH5, quiverConfig, interval, depthLimit, refSeq, refWindow, seedConsensus):
    
    intStart, intEnd = interval
    subWin = subWindow(refWindow, interval)
    
    windowRefSeq = refSeq[intStart:intEnd]
    rows = readsInWindow(cmpH5, subWin,
                           depthLimit = depthLimit,
                           minMapQV = quiverConfig.minMapQV,
                           strategy = "longest",
                           stratum = None,
                           barcode = None)
    
    spanningRows = [row for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd) ]
    
    alns = cmpH5[spanningRows]
    clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
    clippedAlns__ = [ aln for aln in clippedAlns_ if aln.alignedLength <= 120]
    clippedAlns = filterAlns(subWin, clippedAlns__, quiverConfig)
    
    consensus = consensusForAlignmentsDisregardPOA(subWin, windowRefSeq, clippedAlns, quiverConfig, "A"*100)
    print(str(consensus.sequence))
Ejemplo n.º 2
0
def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig,
                                  depthLimit, quiverConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given an alignment file.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Quiver operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for quiver
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(cmpH5, refWindow,
                                  depthLimit=20000,
                                  minMapQV=quiverConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, quiverConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(cmpH5, subWin,
                               depthLimit=depthLimit,
                               minMapQV=quiverConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, quiverConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= quiverConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           quiverConfig)

            siteCoverage = U.coverageInWindow(subWin, alns)

            if options.diploid:
                variants_ = diploid.variantsFromConsensus(subWin, windowRefSeq,
                                                          css.sequence, css.confidence, siteCoverage,
                                                          options.aligner,
                                                          css.mms)
            else:
                variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
                                                    css.sequence, css.confidence, siteCoverage,
                                                    options.aligner,
                                                    mms=None)

            filteredVars =  filterVariants(options.minCoverage,
                                           options.minConfidence,
                                           variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            shouldDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if shouldDumpEvidence:
                dumpEvidence(options.evidenceDirectory,
                             subWin, windowRefSeq,
                             clippedAlns, css)
        else:
            css = QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus,
                                                  subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants
Ejemplo n.º 3
0
def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig,
                                  depthLimit, quiverConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given an alignment file.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Quiver operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for quiver
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(cmpH5, refWindow,
                                  depthLimit=20000,
                                  minMapQV=quiverConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, quiverConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(cmpH5, subWin,
                               depthLimit=depthLimit,
                               minMapQV=quiverConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, quiverConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= quiverConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           quiverConfig)

            siteCoverage = U.coverageInWindow(subWin, alns)

            if options.diploid:
                variants_ = diploid.variantsFromConsensus(subWin, windowRefSeq,
                                                          css.sequence, css.confidence, siteCoverage,
                                                          options.aligner,
                                                          css.mms)
            else:
                variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
                                                    css.sequence, css.confidence, siteCoverage,
                                                    options.aligner,
                                                    mms=None)

            filteredVars =  filterVariants(options.minCoverage,
                                           options.minConfidence,
                                           variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            shouldDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if shouldDumpEvidence:
                dumpEvidence(options.evidenceDirectory,
                             subWin, windowRefSeq,
                             clippedAlns, css)
        else:
            css = QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus,
                                                  subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants