def _mainLoop(self): # Split up reference genome into chunks and farm out the # a chunk as a unit of work. logging.debug("Starting main loop.") ids = reference.enumerateIds(options.referenceWindows) for _id in ids: if options.fancyChunking: chunks = reference.fancyEnumerateChunks(self._inCmpH5, _id, options.referenceChunkSize, options.minCoverage, options.minMapQV, options.referenceWindows) else: chunks = reference.enumerateChunks(_id, options.referenceChunkSize, options.referenceWindows) for chunk in chunks: if self._aborting: return try: self._workQueue.put(chunk, True, options.queueTimeout) except: return # Write sentinels ("end-of-work-stream") for i in xrange(options.numWorkers): self._workQueue.put(None)
def _mainLoop(self): # Split up reference genome into chunks and farm out the # a chunk as a unit of work. logging.debug("Starting main loop.") ids = reference.enumerateIds(options.referenceWindows) for _id in ids: if options.fancyChunking: chunks = reference.fancyEnumerateChunks( self._inAlnFile, _id, options.referenceChunkSize, options.minCoverage, options.minMapQV, options.referenceWindows) else: chunks = reference.enumerateChunks(_id, options.referenceChunkSize, options.referenceWindows) for chunk in chunks: if self._aborting: return self._workQueue.put(chunk) # Write sentinels ("end-of-work-stream") for i in xrange(options.numWorkers): self._workQueue.put(None)
def getReads(cmpH5, reference, interval, paddedTemplateWidth, depthLimit, real_quiver=False): minMapQV = 10 minPoaCoverage = 3 maxPoaCoverage = 11 mutationSeparation = 10 mutationNeighborhood = 20 maxIterations = 20 refineDinucleotideRepeats = True noEvidenceConsensus = "nocall" computeConfidence = True readStumpinessThreshold = 0.1 refId = [x for x in reference.enumerateIds()][0] refSeq = reference.byId[refId].sequence refWindow = (refId, 0, reference.byId[refId].length) intStart, intEnd = interval subWin = subWindow(refWindow, interval) windowRefSeq = refSeq[intStart:intEnd] rows = readsInWindow(cmpH5, subWin, depthLimit = depthLimit, minMapQV = minMapQV, strategy = "longest", stratum = None, barcode = None) #print([cmpH5[row].alignedLength for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd)]) spanningRows = [row for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd) ] alns = cmpH5[spanningRows] clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ] clippedAlns__ = [ aln for aln in clippedAlns_ if aln.alignedLength <= paddedTemplateWidth - 7] clippedAlns = filterAlns(subWin, clippedAlns__, readStumpinessThreshold) # Compute the POA consensus, which is our initial guess, and # should typically be > 99.5% accurate fwdSequences = ["genomic", aligned=False) for a in clippedAlns] p = cc.PoaConsensus.FindConsensus(fwdSequences[:maxPoaCoverage]) template = p.Sequence() tmplSeq = np.zeros((paddedTemplateWidth), dtype=np.uint8) tmplOrds = map(ord, template) tmplSeq[:len(tmplOrds)] = tmplOrds #read pos y, read x readSeqs = np.zeros((paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8) for i in xrange(len(clippedAlns)): alnOrds = map(ord, fwdSequences[i]) readSeqs[:len(alnOrds), i] = alnOrds #uint8 #metric z, read pos y, read x qvInfo = np.zeros((8, paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8) for i in xrange(len(clippedAlns)): qvInfo[0, :clippedAlns[i].readLength, i] = clippedAlns[i].InsertionQV(orientation="genomic", aligned=False) qvInfo[1, :clippedAlns[i].readLength, i] = clippedAlns[i].MergeQV(orientation="genomic", aligned=False) qvInfo[2, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionQV(orientation="genomic", aligned=False) qvInfo[3, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionTag(orientation="genomic", aligned=False) qvInfo[4, :clippedAlns[i].readLength, i] = clippedAlns[i].SubstitutionQV(orientation="genomic", aligned=False) if real_quiver: return template, len(tmplOrds), fwdSequences, qvInfo else: return tmplSeq, len(tmplOrds), readSeqs, qvInfo
def getReads(cmpH5, reference, interval, paddedTemplateWidth, depthLimit, real_quiver=False): minMapQV = 10 minPoaCoverage = 3 maxPoaCoverage = 11 mutationSeparation = 10 mutationNeighborhood = 20 maxIterations = 20 refineDinucleotideRepeats = True noEvidenceConsensus = "nocall" computeConfidence = True readStumpinessThreshold = 0.1 refId = [x for x in reference.enumerateIds()][0] refSeq = reference.byId[refId].sequence refWindow = (refId, 0, reference.byId[refId].length) intStart, intEnd = interval subWin = subWindow(refWindow, interval) windowRefSeq = refSeq[intStart:intEnd] rows = readsInWindow(cmpH5, subWin, depthLimit=depthLimit, minMapQV=minMapQV, strategy="longest", stratum=None, barcode=None) #print([cmpH5[row].alignedLength for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd)]) spanningRows = [ row for row in rows if cmpH5[row].spansReferenceRange(intStart, intEnd) ] alns = cmpH5[spanningRows] clippedAlns_ = [aln.clippedTo(*interval) for aln in alns] clippedAlns__ = [ aln for aln in clippedAlns_ if aln.alignedLength <= paddedTemplateWidth - 7 ] clippedAlns = filterAlns(subWin, clippedAlns__, readStumpinessThreshold) # Compute the POA consensus, which is our initial guess, and # should typically be > 99.5% accurate fwdSequences = ["genomic", aligned=False) for a in clippedAlns ] p = cc.PoaConsensus.FindConsensus(fwdSequences[:maxPoaCoverage]) template = p.Sequence() tmplSeq = np.zeros((paddedTemplateWidth), dtype=np.uint8) tmplOrds = map(ord, template) tmplSeq[:len(tmplOrds)] = tmplOrds #read pos y, read x readSeqs = np.zeros((paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8) for i in xrange(len(clippedAlns)): alnOrds = map(ord, fwdSequences[i]) readSeqs[:len(alnOrds), i] = alnOrds #uint8 #metric z, read pos y, read x qvInfo = np.zeros((8, paddedTemplateWidth, len(clippedAlns)), dtype=np.uint8) for i in xrange(len(clippedAlns)): qvInfo[0, :clippedAlns[i].readLength, i] = clippedAlns[i].InsertionQV(orientation="genomic", aligned=False) qvInfo[1, :clippedAlns[i].readLength, i] = clippedAlns[i].MergeQV(orientation="genomic", aligned=False) qvInfo[2, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionQV(orientation="genomic", aligned=False) qvInfo[3, :clippedAlns[i].readLength, i] = clippedAlns[i].DeletionTag(orientation="genomic", aligned=False) qvInfo[4, :clippedAlns[i].readLength, i] = clippedAlns[i].SubstitutionQV(orientation="genomic", aligned=False) if real_quiver: return template, len(tmplOrds), fwdSequences, qvInfo else: return tmplSeq, len(tmplOrds), readSeqs, qvInfo
options = dummy() options.diploid = False options.parametersFile = "/home/nick/workspace/btry6790_project/venv/lib/python2.7/site-packages/GenomicConsensus/quiver/resources/2013-09/GenomicConsensus/QuiverParameters.ini" options.parameterSet = "best" options.refineDinucleotideRepeats = True options.noEvidenceConsensusCall = "nocall" options.minMapQV = 10 options.fastMode = False cmpH5 = CmpH5Reader('/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5') quiverConfig = configure(options, cmpH5) depthLimit = 100 reference.loadFromFile("/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5) refId = [x for x in reference.enumerateIds()][0] refSeq = reference.byId[refId].sequence refWindow = (refId, 0, reference.byId[refId].length) def run_real_quiver(cmpH5, quiverConfig, interval, depthLimit, refSeq, refWindow, seedConsensus): intStart, intEnd = interval subWin = subWindow(refWindow, interval) windowRefSeq = refSeq[intStart:intEnd] rows = readsInWindow(cmpH5, subWin, depthLimit = depthLimit, minMapQV = quiverConfig.minMapQV, strategy = "longest", stratum = None, barcode = None)