コード例 #1
0
ファイル: bin.py プロジェクト: alneberg/GroopM
    def makeBinDist(self, transformedCP, averageCoverages, kmerNormPC1, kmerPCs, contigGCs, contigLengths):
        """Determine the distribution of the points in this bin

        The distribution is largely normal, except at the boundaries.
        """
        #print "MBD", self.id, self.binSize
        self.binSize = self.rowIndices.shape[0]
        if(0 == np.size(self.rowIndices)):
            return

        # get the centroids
        (self.covMedians, self.covStdevs) = self.getCentroidStats(transformedCP)
        (self.lengthMean, self.lengthStd) = self.getCentroidStats(contigLengths)

        self.kValMeanNormPC1 = np_median(kmerPCs[self.rowIndices])
        self.kValStdevNormPC1 = np_std(kmerPCs[self.rowIndices])

        self.kMedian = np_median(kmerPCs[self.rowIndices], axis=0)
        self.kStdevs = np_std(kmerPCs[self.rowIndices], axis=0)

        cvals = self.getAverageCoverageDist(averageCoverages)
        self.cValMedian = np_around(np_median(cvals), decimals=3)
        self.cValStdev = np_around(np_std(cvals), decimals=3)

        self.gcMedian = np_median(contigGCs[self.rowIndices])
        self.gcStdev = np_std(contigGCs[self.rowIndices])

        # work out the total size
        self.totalBP = sum([contigLengths[i] for i in self.rowIndices])

        # set the acceptance ranges
        self.makeLimits()
コード例 #2
0
ファイル: bin.py プロジェクト: zhaoxia413/GroopM
    def makeBinDist(self, transformedCP, averageCoverages, kmerNormPC1,
                    kmerPCs, contigGCs, contigLengths):
        """Determine the distribution of the points in this bin

        The distribution is largely normal, except at the boundaries.
        """
        #print("MBD", self.id, self.binSize)
        self.binSize = self.rowIndices.shape[0]
        if (0 == np.size(self.rowIndices)):
            return

        # get the centroids
        (self.covMedians,
         self.covStdevs) = self.getCentroidStats(transformedCP)
        (self.lengthMean,
         self.lengthStd) = self.getCentroidStats(contigLengths)

        self.kValMeanNormPC1 = np_median(kmerPCs[self.rowIndices])
        self.kValStdevNormPC1 = np_std(kmerPCs[self.rowIndices])

        self.kMedian = np_median(kmerPCs[self.rowIndices], axis=0)
        self.kStdevs = np_std(kmerPCs[self.rowIndices], axis=0)

        cvals = self.getAverageCoverageDist(averageCoverages)
        self.cValMedian = np_around(np_median(cvals), decimals=3)
        self.cValStdev = np_around(np_std(cvals), decimals=3)

        self.gcMedian = np_median(contigGCs[self.rowIndices])
        self.gcStdev = np_std(contigGCs[self.rowIndices])

        # work out the total size
        self.totalBP = sum([contigLengths[i] for i in self.rowIndices])

        # set the acceptance ranges
        self.makeLimits()
コード例 #3
0
ファイル: cluster.py プロジェクト: jnesme/GroopM-1
    def populateImageMaps(self):
        """Load the transformed data into the main image maps"""
        # reset these guys... JIC
        self.imageMaps = np_zeros((self.numImgMaps, self.PM.scaleFactor, self.PM.scaleFactor))
        self.im2RowIndicies = {}

        # add to the grid wherever we find a contig
        row_index = -1
        for point in np_around(self.PM.transformedCP):
            row_index += 1
            # can only bin things once!
            if row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies:
                # add to the row_index dict so we can relate the
                # map back to individual points later
                p = tuple(point)
                if p in self.im2RowIndicies:
                    self.im2RowIndicies[p].append(row_index)
                else:
                    self.im2RowIndicies[p] = [row_index]

                # now increment in the grid
                # for each point we encounter we incrmement
                # it's position + the positions to each side
                # and touching each corner
                self.incrementViaRowIndex(row_index, p)
コード例 #4
0
ファイル: cluster.py プロジェクト: jnesme/GroopM-1
 def decrementViaRowIndex(self, rowIndex, point=None):
     """Wrapper to decrement about point"""
     if point is None:
         point = tuple(np_around(self.PM.transformedCP[rowIndex]))
     # px = point[0]
     # py = point[1]
     # pz = point[2]
     multiplier = np_log10(self.PM.contigLengths[rowIndex])
     self.decrementAboutPoint(0, point[0], point[1], multiplier=multiplier)
     if self.numImgMaps > 1:
         self.decrementAboutPoint(1, self.PM.scaleFactor - point[2] - 1, point[1], multiplier=multiplier)
         self.decrementAboutPoint(
             2, self.PM.scaleFactor - point[2] - 1, self.PM.scaleFactor - point[0] - 1, multiplier=multiplier
         )
コード例 #5
0
ファイル: cluster.py プロジェクト: jnesme/GroopM-1
    def findNewClusterCenters(self, ss=0):
        """Find a putative cluster"""

        inRange = lambda x, l, u: x >= l and x < u

        # we work from the top view as this has the base clustering
        max_index = np_argmax(self.blurredMaps[0])
        max_value = self.blurredMaps[0].ravel()[max_index]

        max_x = int(max_index / self.PM.scaleFactor)
        max_y = max_index - self.PM.scaleFactor * max_x
        max_z = -1

        ret_values = [max_value, max_x, max_y]

        start_span = int(1.5 * self.span)
        span_len = 2 * start_span + 1

        if self.debugPlots:
            self.plotRegion(max_x, max_y, max_z, fileName="Image_" + str(self.imageCounter), tag="column", column=True)
            self.imageCounter += 1

        # make a 3d grid to hold the values
        working_block = np_zeros((span_len, span_len, self.PM.scaleFactor))

        # go through the entire column
        (x_lower, x_upper) = self.makeCoordRanges(max_x, start_span)
        (y_lower, y_upper) = self.makeCoordRanges(max_y, start_span)
        super_putative_row_indices = []
        for p in self.im2RowIndicies:
            if inRange(p[0], x_lower, x_upper) and inRange(p[1], y_lower, y_upper):
                for row_index in self.im2RowIndicies[p]:
                    # check that the point is real and that it has not yet been binned
                    if row_index not in self.PM.binnedRowIndicies and row_index not in self.PM.restrictedRowIndicies:
                        # this is an unassigned point.
                        multiplier = np_log10(self.PM.contigLengths[row_index])
                        self.incrementAboutPoint3D(
                            working_block, p[0] - x_lower, p[1] - y_lower, p[2], multiplier=multiplier
                        )
                        super_putative_row_indices.append(row_index)

        # blur and find the highest value
        bwb = ndi.gaussian_filter(working_block, 8)  # self.blurRadius)
        densest_index = np_unravel_index(np_argmax(bwb), (np_shape(bwb)))
        max_x = densest_index[0] + x_lower
        max_y = densest_index[1] + y_lower
        max_z = densest_index[2]

        # now get the basic color of this dense point
        putative_center_row_indices = []

        (x_lower, x_upper) = self.makeCoordRanges(max_x, self.span)
        (y_lower, y_upper) = self.makeCoordRanges(max_y, self.span)
        (z_lower, z_upper) = self.makeCoordRanges(max_z, 2 * self.span)

        for row_index in super_putative_row_indices:
            p = np_around(self.PM.transformedCP[row_index])
            if inRange(p[0], x_lower, x_upper) and inRange(p[1], y_lower, y_upper) and inRange(p[2], z_lower, z_upper):
                # we are within the range!
                putative_center_row_indices.append(row_index)

        # make sure we have something to go on here
        if np_size(putative_center_row_indices) == 0:
            # it's all over!
            return None

        if np_size(putative_center_row_indices) == 1:
            # get out of here but keep trying
            # the calling function may restrict these indices
            return [[np_array(putative_center_row_indices)], ret_values]
        else:
            total_BP = sum([self.PM.contigLengths[i] for i in putative_center_row_indices])
            if not self.isGoodBin(total_BP, len(putative_center_row_indices), ms=5):  # Can we trust very small bins?.
                # get out of here but keep trying
                # the calling function should restrict these indices
                return [[np_array(putative_center_row_indices)], ret_values]
            else:
                # we've got a few good guys here, partition them up!
                # shift these guys around a bit
                center_k_vals = np_array([self.PM.kmerVals[i] for i in putative_center_row_indices])
                k_partitions = self.partitionVals(center_k_vals)

                if len(k_partitions) == 0:
                    return None
                else:
                    center_c_vals = np_array([self.PM.transformedCP[i][-1] for i in putative_center_row_indices])
                    # center_c_vals = np_array([self.PM.averageCoverages[i] for i in putative_center_row_indices])
                    center_c_vals -= np_min(center_c_vals)
                    c_max = np_max(center_c_vals)
                    if c_max != 0:
                        center_c_vals /= c_max
                    c_partitions = self.partitionVals(center_c_vals)

                    # take the intersection of the two partitions
                    tmp_partition_hash_1 = {}
                    id = 1
                    for p in k_partitions:
                        for i in p:
                            tmp_partition_hash_1[i] = id
                        id += 1

                    tmp_partition_hash_2 = {}
                    id = 1
                    for p in c_partitions:
                        for i in p:
                            try:
                                tmp_partition_hash_2[(tmp_partition_hash_1[i], id)].append(i)
                            except KeyError:
                                tmp_partition_hash_2[(tmp_partition_hash_1[i], id)] = [i]
                        id += 1

                    partitions = [
                        np_array([putative_center_row_indices[i] for i in tmp_partition_hash_2[key]])
                        for key in tmp_partition_hash_2.keys()
                    ]

                    # pcs = [[self.PM.averageCoverages[i] for i in p] for p in partitions]
                    # print pcs
                    return [partitions, ret_values]