def findDiffFP(self): cuts = self.reads forwardArray, backwardArray = cuts["+"], cuts["-"] cuts2 = self.reads2 forwardArray2, backwardArray2 = cuts2["+"], cuts2["-"] # Adjust the FDR threshold to a minimum of withCutoff threshold = min(self.FDR_value, self.MIN_SCORE) # Find the footprints at this threshold offsets = self.footprints(threshold) # Work out the bootstrap scores for these footprints using the other data set best_probabilities, best_footprintsizes = pyDNase.footprinting.WellingtonC.diff_calculate( forwardArray, backwardArray, forwardArray2, backwardArray2, [i[1] for i in offsets], [i[0] for i in offsets], threshold) result_intervals = [] for i in offsets: middle = self.interval.startbp + i[0] fp_halfsize = (best_footprintsizes[i[0]] // 2) left = middle - fp_halfsize right = middle + fp_halfsize ml_score = best_probabilities[i[0]] result = pyDNase.GenomicInterval(self.interval.chromosome, left, right, score=ml_score) result_intervals.append(result) return result_intervals
def footprints(self, withCutoff=-30, merge=1): """ This returns reads GenomicIntervalSet with the intervals retrieved below the specific cutoff applied to the selected data """ #This find the positions of all the ranges below the cutoff using reads new method ranges = [] tempMLE, templogProb = np.array(self.lengths), np.array(self.scores) #Here we have some different logic for selecting the summits of footprints #TODO: Document this part while templogProb.min() < withCutoff: minimapos = templogProb.argmin() minimafplen = tempMLE[minimapos] minimaphalffplen = int(minimafplen) / 2 lbound = max(minimapos - (minimaphalffplen), 0) rbound = min(minimapos + (minimaphalffplen), len(templogProb)) ranges.append((lbound, rbound, templogProb.min(), minimafplen)) templogProb[max(lbound - minimafplen, 0 ):min(rbound + minimafplen, len(templogProb))] = 1 returnSet = pyDNase.GenomicIntervalSet() #Merges overlapping ranges (TODO: documentation) if ranges: # This change here changes the way we merge footprints from the probability trace #TODO: Documentation if merge: merged_ranges = [] while len(ranges): #Find best score sorted(ranges, key=lambda x: -x[2]) #Take the last value best = ranges.pop() merged_ranges.append(best) #Check for overlapping regions and remove new_ranges = [] for c, d, e, f in ranges: if not c <= best[1] <= d: new_ranges.append([c, d, e, f]) ranges = new_ranges else: merged_ranges = ranges #Creates reads GenomicIntervalSet and adds the footprints to them for i in merged_ranges: rstartbp = self.interval.startbp + i[0] #We must add one to the end base of the footprint to account for the BED file format rendbp = self.interval.startbp + i[1] + 1 region = pyDNase.GenomicInterval(self.interval.chromosome, rstartbp, rendbp, strand="+", score=i[2]) returnSet += region return returnSet