def test_coverage_in_range2(self): # Brute force over lambda for winStart in xrange(0, 45000, 50): winEnd = winStart + 1 assert_array_equal([ len( brute_force_reads_in_range( winStart, winEnd, self.cmpH5.tStart, self.cmpH5.tEnd)) ], RQ.getCoverageInRange(self.cmpH5, (1, winStart, winEnd)))
def test_get_coverage_in_range(self): assert (all(RQ.getCoverageInRange(self.cmpH5, (1, 0, 100)) == 2))
def test_get_coverage_in_range(self): assert(all(RQ.getCoverageInRange(self.cmpH5, (1, 0, 100)) == 2))
def test_coverage_in_range2(self): # Brute force over lambda for winStart in xrange(0, 45000, 50): winEnd = winStart + 1 assert_array_equal([len(brute_force_reads_in_range(winStart, winEnd, self.cmpH5.tStart, self.cmpH5.tEnd))], RQ.getCoverageInRange(self.cmpH5, (1, winStart, winEnd)))
def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig, depthLimit, quiverConfig): """ High-level routine for calling the consensus for a window of the genome given a cmp.h5. Identifies the coverage contours of the window in order to identify subintervals where a good consensus can be called. Creates the desired "no evidence consensus" where there is inadequate coverage. """ winId, winStart, winEnd = refWindow logging.info("Quiver operating on %s" % reference.windowToString(refWindow)) if options.fancyChunking: # 1) identify the intervals with adequate coverage for quiver # consensus; restrict to intervals of length > 10 allRows = U.readsInWindow(cmpH5, refWindow, minMapQV=quiverConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) starts = cmpH5.tStart[allRows] ends = cmpH5.tEnd[allRows] intervals = kSpannedIntervals(refWindow, quiverConfig.minPoaCoverage, starts, ends, minLength=10) coverageGaps = holes(refWindow, intervals) allIntervals = sorted(intervals + coverageGaps) if len(allIntervals) > 1: logging.info("Usable coverage in %s: %r" % (reference.windowToString(refWindow), intervals)) else: allIntervals = [ (winStart, winEnd) ] # 2) pull out the reads we will use for each interval # 3) call consensusForAlignments on the interval subConsensi = [] variants = [] for interval in allIntervals: intStart, intEnd = interval intRefSeq = referenceContig[intStart:intEnd] subWin = subWindow(refWindow, interval) windowRefSeq = referenceContig[intStart:intEnd] rows = U.readsInWindow(cmpH5, subWin, depthLimit=depthLimit, minMapQV=quiverConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) alns = cmpH5[rows] clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ] clippedAlns = U.filterAlns(subWin, clippedAlns_, quiverConfig) if len([ a for a in clippedAlns if a.spansReferenceRange(*interval) ]) >= quiverConfig.minPoaCoverage: logging.debug("%s: Row numbers being used: %s" % (reference.windowToString(subWin), " ".join(map(str, rows)))) css = U.consensusForAlignments(subWin, intRefSeq, clippedAlns, quiverConfig) siteCoverage = rangeQueries.getCoverageInRange(cmpH5, subWin, rows) if options.diploid: variants_ = diploid.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, css.mms) else: variants_ = U.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, mms=None) filteredVars = filterVariants(options.minCoverage, options.minConfidence, variants_) # Annotate? if options.annotateGFF: annotateVariants(filteredVars, clippedAlns) variants += filteredVars # Dump? shouldDumpEvidence = \ ((options.dumpEvidence == "all") or (options.dumpEvidence == "variants") and (len(variants) > 0)) if shouldDumpEvidence: dumpEvidence(options.evidenceDirectory, subWin, windowRefSeq, clippedAlns, css) else: css = QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus, subWin, intRefSeq) subConsensi.append(css) # 4) glue the subwindow consensus objects together to form the # full window consensus css = join(subConsensi) # 5) Return return css, variants