def makeBinDist(self, transformedCP, averageCoverages, kmerNormPC1, kmerPCs, contigGCs, contigLengths): """Determine the distribution of the points in this bin The distribution is largely normal, except at the boundaries. """ #print "MBD", self.id, self.binSize self.binSize = self.rowIndices.shape[0] if(0 == np.size(self.rowIndices)): return # get the centroids (self.covMedians, self.covStdevs) = self.getCentroidStats(transformedCP) (self.lengthMean, self.lengthStd) = self.getCentroidStats(contigLengths) self.kValMeanNormPC1 = np_median(kmerPCs[self.rowIndices]) self.kValStdevNormPC1 = np_std(kmerPCs[self.rowIndices]) self.kMedian = np_median(kmerPCs[self.rowIndices], axis=0) self.kStdevs = np_std(kmerPCs[self.rowIndices], axis=0) cvals = self.getAverageCoverageDist(averageCoverages) self.cValMedian = np_around(np_median(cvals), decimals=3) self.cValStdev = np_around(np_std(cvals), decimals=3) self.gcMedian = np_median(contigGCs[self.rowIndices]) self.gcStdev = np_std(contigGCs[self.rowIndices]) # work out the total size self.totalBP = sum([contigLengths[i] for i in self.rowIndices]) # set the acceptance ranges self.makeLimits()
def makeBinDist(self, transformedCP, averageCoverages, kmerNormPC1, kmerPCs, contigGCs, contigLengths): """Determine the distribution of the points in this bin The distribution is largely normal, except at the boundaries. """ #print("MBD", self.id, self.binSize) self.binSize = self.rowIndices.shape[0] if (0 == np.size(self.rowIndices)): return # get the centroids (self.covMedians, self.covStdevs) = self.getCentroidStats(transformedCP) (self.lengthMean, self.lengthStd) = self.getCentroidStats(contigLengths) self.kValMeanNormPC1 = np_median(kmerPCs[self.rowIndices]) self.kValStdevNormPC1 = np_std(kmerPCs[self.rowIndices]) self.kMedian = np_median(kmerPCs[self.rowIndices], axis=0) self.kStdevs = np_std(kmerPCs[self.rowIndices], axis=0) cvals = self.getAverageCoverageDist(averageCoverages) self.cValMedian = np_around(np_median(cvals), decimals=3) self.cValStdev = np_around(np_std(cvals), decimals=3) self.gcMedian = np_median(contigGCs[self.rowIndices]) self.gcStdev = np_std(contigGCs[self.rowIndices]) # work out the total size self.totalBP = sum([contigLengths[i] for i in self.rowIndices]) # set the acceptance ranges self.makeLimits()
def populateImageMaps(self): """Load the transformed data into the main image maps""" # reset these guys... JIC self.imageMaps = np_zeros((self.numImgMaps, self.PM.scaleFactor, self.PM.scaleFactor)) self.im2RowIndicies = {} # add to the grid wherever we find a contig row_index = -1 for point in np_around(self.PM.transformedCP): row_index += 1 # can only bin things once! if row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies: # add to the row_index dict so we can relate the # map back to individual points later p = tuple(point) if p in self.im2RowIndicies: self.im2RowIndicies[p].append(row_index) else: self.im2RowIndicies[p] = [row_index] # now increment in the grid # for each point we encounter we incrmement # it's position + the positions to each side # and touching each corner self.incrementViaRowIndex(row_index, p)
def decrementViaRowIndex(self, rowIndex, point=None): """Wrapper to decrement about point""" if point is None: point = tuple(np_around(self.PM.transformedCP[rowIndex])) # px = point[0] # py = point[1] # pz = point[2] multiplier = np_log10(self.PM.contigLengths[rowIndex]) self.decrementAboutPoint(0, point[0], point[1], multiplier=multiplier) if self.numImgMaps > 1: self.decrementAboutPoint(1, self.PM.scaleFactor - point[2] - 1, point[1], multiplier=multiplier) self.decrementAboutPoint( 2, self.PM.scaleFactor - point[2] - 1, self.PM.scaleFactor - point[0] - 1, multiplier=multiplier )
def findNewClusterCenters(self, ss=0): """Find a putative cluster""" inRange = lambda x, l, u: x >= l and x < u # we work from the top view as this has the base clustering max_index = np_argmax(self.blurredMaps[0]) max_value = self.blurredMaps[0].ravel()[max_index] max_x = int(max_index / self.PM.scaleFactor) max_y = max_index - self.PM.scaleFactor * max_x max_z = -1 ret_values = [max_value, max_x, max_y] start_span = int(1.5 * self.span) span_len = 2 * start_span + 1 if self.debugPlots: self.plotRegion(max_x, max_y, max_z, fileName="Image_" + str(self.imageCounter), tag="column", column=True) self.imageCounter += 1 # make a 3d grid to hold the values working_block = np_zeros((span_len, span_len, self.PM.scaleFactor)) # go through the entire column (x_lower, x_upper) = self.makeCoordRanges(max_x, start_span) (y_lower, y_upper) = self.makeCoordRanges(max_y, start_span) super_putative_row_indices = [] for p in self.im2RowIndicies: if inRange(p[0], x_lower, x_upper) and inRange(p[1], y_lower, y_upper): for row_index in self.im2RowIndicies[p]: # check that the point is real and that it has not yet been binned if row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies: # this is an unassigned point. multiplier = np_log10(self.PM.contigLengths[row_index]) self.incrementAboutPoint3D( working_block, p[0] - x_lower, p[1] - y_lower, p[2], multiplier=multiplier ) super_putative_row_indices.append(row_index) # blur and find the highest value bwb = ndi.gaussian_filter(working_block, 8) # self.blurRadius) densest_index = np_unravel_index(np_argmax(bwb), (np_shape(bwb))) max_x = densest_index[0] + x_lower max_y = densest_index[1] + y_lower max_z = densest_index[2] # now get the basic color of this dense point putative_center_row_indices = [] (x_lower, x_upper) = self.makeCoordRanges(max_x, self.span) (y_lower, y_upper) = self.makeCoordRanges(max_y, self.span) (z_lower, z_upper) = self.makeCoordRanges(max_z, 2 * self.span) for row_index in super_putative_row_indices: p = np_around(self.PM.transformedCP[row_index]) if inRange(p[0], x_lower, x_upper) and inRange(p[1], y_lower, y_upper) and inRange(p[2], z_lower, z_upper): # we are within the range! putative_center_row_indices.append(row_index) # make sure we have something to go on here if np_size(putative_center_row_indices) == 0: # it's all over! return None if np_size(putative_center_row_indices) == 1: # get out of here but keep trying # the calling function may restrict these indices return [[np_array(putative_center_row_indices)], ret_values] else: total_BP = sum([self.PM.contigLengths[i] for i in putative_center_row_indices]) if not self.isGoodBin(total_BP, len(putative_center_row_indices), ms=5): # Can we trust very small bins?. # get out of here but keep trying # the calling function should restrict these indices return [[np_array(putative_center_row_indices)], ret_values] else: # we've got a few good guys here, partition them up! # shift these guys around a bit center_k_vals = np_array([self.PM.kmerVals[i] for i in putative_center_row_indices]) k_partitions = self.partitionVals(center_k_vals) if len(k_partitions) == 0: return None else: center_c_vals = np_array([self.PM.transformedCP[i][-1] for i in putative_center_row_indices]) # center_c_vals = np_array([self.PM.averageCoverages[i] for i in putative_center_row_indices]) center_c_vals -= np_min(center_c_vals) c_max = np_max(center_c_vals) if c_max != 0: center_c_vals /= c_max c_partitions = self.partitionVals(center_c_vals) # take the intersection of the two partitions tmp_partition_hash_1 = {} id = 1 for p in k_partitions: for i in p: tmp_partition_hash_1[i] = id id += 1 tmp_partition_hash_2 = {} id = 1 for p in c_partitions: for i in p: try: tmp_partition_hash_2[(tmp_partition_hash_1[i], id)].append(i) except KeyError: tmp_partition_hash_2[(tmp_partition_hash_1[i], id)] = [i] id += 1 partitions = [ np_array([putative_center_row_indices[i] for i in tmp_partition_hash_2[key]]) for key in tmp_partition_hash_2.keys() ] # pcs = [[self.PM.averageCoverages[i] for i in p] for p in partitions] # print pcs return [partitions, ret_values]