Python kSpannedIntervals Beispiele, GenomicConsensus.windows.kSpannedIntervals Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_5():
    """
    A case where there is nowhere 3-spanning coverage
    """
    refWindow = (0, 0, 10)
    reads = [(x, x + 1) for x in xrange(0, 10)]
    reads.append((0, 10))
    start, end = map(np.array, zip(*reads))
    assert_equals([(x, x + 1) for x in xrange(0, 10)],
                  kSpannedIntervals(refWindow, 2, start, end))
    assert_equals([], kSpannedIntervals(refWindow, 3, start, end))

Beispiel #2

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_5():
    """
    A case where there is nowhere 3-spanning coverage
    """
    refWindow = (0, 0, 10)
    reads = [ (x, x+1) for  x in xrange(0, 10) ]
    reads.append((0, 10))
    start, end = map(np.array, zip(*reads))
    assert_equals([ (x, x+1) for  x in xrange(0, 10) ],
                  kSpannedIntervals(refWindow, 2, start, end))
    assert_equals([],
                  kSpannedIntervals(refWindow, 3, start, end))

Beispiel #3

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_3():
    """
    Intervals covering the middle of the window -- "dromedary"
    """
    refWindow = (0, 0, 10)
    start = np.array([3] * 10, dtype=int)
    end = np.array([7] * 10, dtype=int)
    assert_equals([(3, 7)], kSpannedIntervals(refWindow, 3, start, end))

Beispiel #4

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_2():
    """
    Intervals not touching the window
    """
    refWindow = (0, 1, 10)
    start = np.array([0] * 5 + [10] * 5, dtype=int)
    end = np.array([1] * 5 + [15] * 5, dtype=int)
    assert_equals([], kSpannedIntervals(refWindow, 3, start, end))

Beispiel #5

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_1():
    """
    Intervals all covering the window
    """
    refWindow = (0, 100, 1010)
    start = np.array(np.array([100] * 10, dtype=int), dtype=int)
    end = np.array(np.array([110] * 10, dtype=int), dtype=int)
    assert_equals([(100, 110)], kSpannedIntervals(refWindow, 3, start, end))

Beispiel #6

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_underflow():
    """
    I found an case that gave the wrong results due to an underflow.
    Regression test here.
    """
    refWindow = (0, 5, 10)
    tStart = np.arange(10, dtype=np.uint32)
    tEnd = tStart + 10
    assert_equals([(5, 10)], kSpannedIntervals(refWindow, 3, tStart, tEnd))

Beispiel #7

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: xxz19900/GenomicConsensus

def test_intervals_4():
    """
    Two intervals at the fringes, with a hole in the middle --- "camel"
    """
    refWindow = (0, 100, 110)
    start = np.array([103] * 5 + [107] * 5, dtype=int)
    end = np.array([105] * 5 + [109] * 5, dtype=int)
    assert_equals([(103, 105), (107, 109)],
                  kSpannedIntervals(refWindow, 3, start, end))

Beispiel #8

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_underflow():
    """
    I found an case that gave the wrong results due to an underflow.
    Regression test here.
    """
    refWindow = (0, 5, 10)
    tStart = np.arange(10, dtype=np.uint32)
    tEnd   = tStart + 10
    assert_equals([(5, 10)], kSpannedIntervals(refWindow, 3, tStart, tEnd))

Beispiel #9

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_4():
    """
    Two intervals at the fringes, with a hole in the middle --- "camel"
    """
    refWindow = (0, 100, 110)
    start = np.array([103]*5 + [107]*5, dtype=int)
    end   = np.array([105]*5 + [109]*5, dtype=int)
    assert_equals([(103,105), (107,109)],
                  kSpannedIntervals(refWindow, 3, start, end))

Beispiel #10

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_3():
    """
    Intervals covering the middle of the window -- "dromedary"
    """
    refWindow = (0, 0, 10)
    start = np.array([3]*10, dtype=int)
    end  = np.array([7]*10, dtype=int)
    assert_equals([(3, 7)],
                  kSpannedIntervals(refWindow, 3, start, end))

Beispiel #11

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_2():
    """
    Intervals not touching the window
    """
    refWindow = (0, 1, 10)
    start = np.array([0]*5 + [10]*5, dtype=int)
    end   = np.array([1]*5 + [15]*5, dtype=int)
    assert_equals([],
                  kSpannedIntervals(refWindow, 3, start, end))

Beispiel #12

0

Datei anzeigen

Datei: test_coverage_intervals.py Projekt: Mozhgann/GenomicConsensus

def test_intervals_1():
    """
    Intervals all covering the window
    """
    refWindow = (0, 100, 1010)
    start = np.array(np.array([100]*10, dtype=int), dtype=int)
    end   = np.array(np.array([110]*10, dtype=int), dtype=int)
    assert_equals([(100, 110)],
                   kSpannedIntervals(refWindow, 3, start, end))

Beispiel #13

0

Datei anzeigen

Datei: consensus.py Projekt: dalexander/DumbView

def consensus(alnReader, refWindow, referenceTable, alns):
    # identify the enlarged interval [-5, +5]
    refName = alnReader.referenceInfo(refWindow.refId).FullName
    refLength = len(referenceTable[refName].sequence)
    eWindow = enlargedReferenceWindow(refWindow, refLength, overlap)
    refSeqInEnlargedWindow = referenceTable[refName].sequence[eWindow.start:eWindow.end]

    # find 3-spanned intervals in the enlarged interval
    # call css for each interval
    subConsensi = []
    tStart = [ a.tStart for a in alns ]
    tEnd = [ a.tEnd for a in alns ]
    coveredIntervals = w.kSpannedIntervals(eWindow, K, tStart, tEnd)
    holes = w.holes(eWindow, coveredIntervals)

    for interval in sorted(coveredIntervals + holes):
        subWin = subWindow(eWindow, interval)
        #print subWin
        intStart, intEnd = interval
        intRefSeq = refSeqInEnlargedWindow[intStart-eWindow.start:
                                           intEnd-eWindow.start]
        css_ = Consensus.nAsConsensus(subWin, intRefSeq)
        if interval in coveredIntervals:
            alns = readsInWindow(alnReader, subWin,
                                 depthLimit=100,
                                 minMapQV=quiverConfig.minMapQV,
                                 strategy="longest")
            clippedAlns = [ aln.clippedTo(*interval) for aln in alns ]
            goodAlns = q.utils.filterAlns(subWin, clippedAlns, quiverConfig)
            if len(goodAlns) >= K:
                css_ = q.utils.consensusForAlignments(subWin,
                                                      intRefSeq,
                                                      goodAlns,
                                                      quiverConfig)

        subConsensi.append(css_)

    # join subconsensus objects
    css = join(subConsensi)

    # align css back to refWindow, and clip
    ga = cc.Align(refSeqInEnlargedWindow, css.sequence)
    targetPositions = cc.TargetToQueryPositions(ga)
    cssStart = targetPositions[refWindow.start-eWindow.start]
    cssEnd   = targetPositions[refWindow.end-eWindow.start]

    cssSequence    = css.sequence[cssStart:cssEnd]
    cssQv          = css.confidence[cssStart:cssEnd]

    consensusObj = Consensus(refWindow,
                             cssSequence,
                             cssQv)
    return consensusObj

Beispiel #14

0

Datei anzeigen

Datei: arrow.py Projekt: wqhf/GenomicConsensus

def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a BAM file.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" % reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile,
                                  refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends = np.fromiter((hit.tEnd for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow,
                                      arrowConfig.minPoaCoverage,
                                      starts,
                                      ends,
                                      minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [(winStart, winEnd)]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile,
                               subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [aln.clippedTo(*interval) for aln in alns]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([a for a in clippedAlns if a.spansReferenceRange(*interval)
                ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin), " ".join(
                              [str(hit.readName) for hit in alns])))

            alnsUsed = [] if options.reportEffectiveCoverage else None
            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig,
                                           alnsUsed=alnsUsed)

            # Tabulate the coverage implied by these alignments, as
            # well as the post-filtering ("effective") coverage
            siteCoverage = U.coverageInWindow(subWin, alns)
            effectiveSiteCoverage = U.coverageInWindow(
                subWin, alnsUsed) if options.reportEffectiveCoverage else None

            variants_, newPureCss = U.variantsFromConsensus(
                subWin,
                windowRefSeq,
                css.sequence,
                css.confidence,
                siteCoverage,
                effectiveSiteCoverage,
                options.aligner,
                ai=None,
                diploid=arrowConfig.polishDiploid)

            # Annotate?
            if options.annotateGFF:
                annotateVariants(variants_, clippedAlns)

            variants += variants_

            # The nascent consensus sequence might contain ambiguous bases, these
            # need to be removed as software in the wild cannot deal with such
            # characters and we only use IUPAC for *internal* bookkeeping.
            if arrowConfig.polishDiploid:
                css.sequence = newPureCss
        else:
            css = ArrowConsensus.noCallConsensus(
                arrowConfig.noEvidenceConsensus, subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants

Beispiel #15

0

Datei anzeigen

Datei: arrow.py Projekt: lpp1985/lpp_Script

def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a cmp.h5.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile, refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile, subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            alnsUsed = [] if options.reportEffectiveCoverage else None
            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig,
                                           alnsUsed=alnsUsed)

            # Tabulate the coverage implied by these alignments, as
            # well as the post-filtering ("effective") coverage
            siteCoverage = U.coverageInWindow(subWin, alns)
            effectiveSiteCoverage = U.coverageInWindow(subWin, alnsUsed) if options.reportEffectiveCoverage else None

            variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
                                                css.sequence, css.confidence, siteCoverage, effectiveSiteCoverage,
                                                options.aligner,
                                                ai=None)

            filteredVars =  filterVariants(options.minCoverage,
                                           options.minConfidence,
                                           variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            maybeDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "outliers") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if maybeDumpEvidence:
                refId, refStart, refEnd = subWin
                refName = reference.idToName(refId)
                windowDirectory = os.path.join(
                    options.evidenceDirectory,
                    refName,
                    "%d-%d" % (refStart, refEnd))
                ev = ArrowEvidence.fromConsensus(css)
                if options.dumpEvidence != "outliers":
                    ev.save(windowDirectory)
                elif (np.max(ev.delta) > 20):
                    # Mathematically I don't think we should be seeing
                    # deltas > 6 in magnitude, but let's just restrict
                    # attention to truly bonkers outliers.
                    ev.save(windowDirectory)

        else:
            css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                                 subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants

Beispiel #16

0

Datei anzeigen

Datei: arrow.py Projekt: lpp1985/lpp_Script

def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a cmp.h5.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" % reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile,
                                  refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends = np.fromiter((hit.tEnd for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow,
                                      arrowConfig.minPoaCoverage,
                                      starts,
                                      ends,
                                      minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [(winStart, winEnd)]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile,
                               subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [aln.clippedTo(*interval) for aln in alns]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([a for a in clippedAlns if a.spansReferenceRange(*interval)
                ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin), " ".join(
                              [str(hit.readName) for hit in alns])))

            alnsUsed = [] if options.reportEffectiveCoverage else None
            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig,
                                           alnsUsed=alnsUsed)

            # Tabulate the coverage implied by these alignments, as
            # well as the post-filtering ("effective") coverage
            siteCoverage = U.coverageInWindow(subWin, alns)
            effectiveSiteCoverage = U.coverageInWindow(
                subWin, alnsUsed) if options.reportEffectiveCoverage else None

            variants_ = U.variantsFromConsensus(subWin,
                                                windowRefSeq,
                                                css.sequence,
                                                css.confidence,
                                                siteCoverage,
                                                effectiveSiteCoverage,
                                                options.aligner,
                                                ai=None)

            filteredVars = filterVariants(options.minCoverage,
                                          options.minConfidence, variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            maybeDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "outliers") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if maybeDumpEvidence:
                refId, refStart, refEnd = subWin
                refName = reference.idToName(refId)
                windowDirectory = os.path.join(options.evidenceDirectory,
                                               refName,
                                               "%d-%d" % (refStart, refEnd))
                ev = ArrowEvidence.fromConsensus(css)
                if options.dumpEvidence != "outliers":
                    ev.save(windowDirectory)
                elif (np.max(ev.delta) > 20):
                    # Mathematically I don't think we should be seeing
                    # deltas > 6 in magnitude, but let's just restrict
                    # attention to truly bonkers outliers.
                    ev.save(windowDirectory)

        else:
            css = ArrowConsensus.noCallConsensus(
                arrowConfig.noEvidenceConsensus, subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants

Beispiel #17

0

Datei anzeigen

def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a cmp.h5.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile, refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="longest",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile, subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="longest",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig)

            siteCoverage = U.coverageInWindow(subWin, alns)

            variants_ = U.variantsFromConsensus(subWin, windowRefSeq,
                                                css.sequence, css.confidence, siteCoverage,
                                                options.aligner,
                                                ai=None)

            filteredVars =  filterVariants(options.minCoverage,
                                           options.minConfidence,
                                           variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            shouldDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if shouldDumpEvidence:
                logging.info("Arrow does not yet support --dumpEvidence")
#                 dumpEvidence(options.evidenceDirectory,
#                              subWin, windowRefSeq,
#                              clippedAlns, css)
        else:
            css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                                 subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants

Beispiel #18

0

Datei anzeigen

Datei: arrow.py Projekt: PacificBiosciences/GenomicConsensus

def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a BAM file.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" %
                 reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile, refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="long-and-strand-balanced",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends   = np.fromiter((hit.tEnd   for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage,
                                      starts, ends, minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [ (winStart, winEnd) ]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile, subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="long-and-strand-balanced",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([ a for a in clippedAlns
                 if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin),
                           " ".join([str(hit.readName) for hit in alns])))

            alnsUsed = [] if options.reportEffectiveCoverage else None
            css = U.consensusForAlignments(subWin,
                                           intRefSeq,
                                           clippedAlns,
                                           arrowConfig,
                                           alnsUsed=alnsUsed)

            # Tabulate the coverage implied by these alignments, as
            # well as the post-filtering ("effective") coverage
            siteCoverage = U.coverageInWindow(subWin, alns)
            effectiveSiteCoverage = U.coverageInWindow(subWin, alnsUsed) if options.reportEffectiveCoverage else None

            variants_, newPureCss = U.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence,
                                                            siteCoverage, effectiveSiteCoverage,
                                                            options.aligner, ai=None,
                                                            diploid=arrowConfig.polishDiploid)

            # Annotate?
            if options.annotateGFF:
                annotateVariants(variants_, clippedAlns)

            variants += variants_

            # The nascent consensus sequence might contain ambiguous bases, these
            # need to be removed as software in the wild cannot deal with such
            # characters and we only use IUPAC for *internal* bookkeeping.
            if arrowConfig.polishDiploid:
                css.sequence = newPureCss
        else:
            css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                                 subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants