def handlePostPileupMerge(self, reads): ''' This function compares a group of alignments and decides which one to keep @param reads - a set of reads with the same name to be compared using pileup ''' avgSum = 0 [pairs, singles] = MergeImprove.pairReads(reads, MergeImprove.PILEUP_HI_TAG) if len(pairs) != 0: bestAvgPileup = -1 bestPairs = [] for pair in pairs: [tot1, bases1] = self.calcPileupStats(pair[0]) [tot2, bases2] = self.calcPileupStats(pair[1]) avgPileup = float(tot1 + tot2) / (bases1 + bases2) avgSum += avgPileup if avgPileup > bestAvgPileup: bestAvgPileup = avgPileup bestPairs = [] if avgPileup == bestAvgPileup: bestPairs.append(pair) #stats if len(bestPairs) == 1: MergeImprove.setTag(bestPairs[0][0], MergeImprove.CHOICE_TYPE_TAG, 'P') MergeImprove.setTag(bestPairs[0][1], MergeImprove.CHOICE_TYPE_TAG, 'P') #save pileup stats if (bestAvgPileup == 0): self.percentageChoice[0] += 2 else: self.percentageChoice[int(100 * bestAvgPileup / avgSum)] += 2 #save one of the best pileup pairs self.saveRandomPair(bestPairs) else: #do this over singles bestAvgPileup = {} bestReads = {} avgSum = {False: 0, True: 0} for read in singles: #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below isFirst = MergeImprove.isFlagSet( read.flag, MergeImprove.FIRST_SEGMENT_FLAG) if not bestAvgPileup.has_key(isFirst): bestAvgPileup[isFirst] = -1 bestReads[isFirst] = [] #get the pileup calculation [tot, bases] = self.calcPileupStats(read) avgPileup = float(tot) / bases avgSum[isFirst] += avgPileup #if it's better, keep it if avgPileup > bestAvgPileup[isFirst]: bestAvgPileup[isFirst] = avgPileup bestReads[isFirst] = [] if avgPileup == bestAvgPileup[isFirst]: bestReads[isFirst].append(read) #save the best from each end for end in bestReads: brs = bestReads[end] if len(brs) == 1: MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG, 'P') if bestAvgPileup[end] == 0: self.percentageChoice[0] += 1 else: self.percentageChoice[int(100 * bestAvgPileup[end] / avgSum[end])] += 1 self.saveRandomSingle(brs)
def handlePostPileupMerge(self, reads): ''' This function compares a group of alignments and decides which one to keep @param reads - a set of reads with the same name to be compared using pileup ''' avgSum = 0 [pairs, singles] = MergeImprove.pairReads(reads, MergeImprove.PILEUP_HI_TAG) if len(pairs) != 0: bestAvgPileup = -1 bestPairs = [] for pair in pairs: [tot1, bases1] = self.calcPileupStats(pair[0]) [tot2, bases2] = self.calcPileupStats(pair[1]) avgPileup = float(tot1+tot2)/(bases1+bases2) avgSum += avgPileup if avgPileup > bestAvgPileup: bestAvgPileup = avgPileup bestPairs = [] if avgPileup == bestAvgPileup: bestPairs.append(pair) #stats if len(bestPairs) == 1: MergeImprove.setTag(bestPairs[0][0], MergeImprove.CHOICE_TYPE_TAG, 'P') MergeImprove.setTag(bestPairs[0][1], MergeImprove.CHOICE_TYPE_TAG, 'P') #save pileup stats if(bestAvgPileup == 0): self.percentageChoice[0] += 2 else: self.percentageChoice[int(100*bestAvgPileup/avgSum)] += 2 #save one of the best pileup pairs self.saveRandomPair(bestPairs) else: #do this over singles bestAvgPileup = {} bestReads = {} avgSum = {False: 0, True: 0} for read in singles: #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below isFirst = MergeImprove.isFlagSet(read.flag, MergeImprove.FIRST_SEGMENT_FLAG) if not bestAvgPileup.has_key(isFirst): bestAvgPileup[isFirst] = -1 bestReads[isFirst] = [] #get the pileup calculation [tot, bases] = self.calcPileupStats(read) avgPileup = float(tot)/bases avgSum[isFirst] += avgPileup #if it's better, keep it if avgPileup > bestAvgPileup[isFirst]: bestAvgPileup[isFirst] = avgPileup bestReads[isFirst] = [] if avgPileup == bestAvgPileup[isFirst]: bestReads[isFirst].append(read) #save the best from each end for end in bestReads: brs = bestReads[end] if len(brs) == 1: MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG, 'P') if bestAvgPileup[end] == 0: self.percentageChoice[0] += 1 else: self.percentageChoice[int(100*bestAvgPileup[end]/avgSum[end])] += 1 self.saveRandomSingle(brs)