def _fuzzy_kmeans_mapper(array, ex, old_centers, centers, counts, labels, m): ''' Update the new centers, new counts and labels using fuzzy kmeans method. Args: array(DistArray): the input data points matrix. ex(Extent): region being processed. old_centers(DistArray): the current centers of each cluster. centers(DistArray): the new centers to be updated. counts(DistArray): the new counts to be updated. labels(DistArray): the new labels for each point to be updated. m(float): the parameter of fuzzy kmeans. ''' points = array.fetch(ex) old_centers = old_centers[:] new_centers = np.zeros_like(old_centers) new_counts = np.zeros((old_centers.shape[0], 1)) new_labels = np.zeros(points.shape[0], dtype=np.int) for i in range(points.shape[0]): point = points[i] prob = _calc_probability(point, old_centers, m) new_labels[i] = np.argmax(prob) for i in prob.nonzero()[0]: new_counts[i] += prob[i] new_centers[i] += prob[i] * point centers.update(extent.from_shape(centers.shape), new_centers) counts.update(extent.from_shape(counts.shape), new_counts) labels.update(extent.create((ex.ul[0],), (ex.lr[0],), labels.shape), new_labels) return []
def _shortest_path_mapper(ex, kng, directed, dist_matrix): """ Mapper kernel for finding shortest path for a subset of points. kng is supposed to be a sparse matrix which represents the distance among each pair of points. dist_matrix is the target matrix which we need to fill with the shortest path between each pair of points. Each kernel is responsible for finding the shortests path among a subset of points. """ row_beg = ex.ul[0] row_end = ex.lr[0] local_dist_matrix = graph_shortest_path(kng, row_beg, row_end, directed=directed) ''' local_dist_matrix is a NxN matrix where the M(i,j) is the shortest path between i and j if it's positive, otherwise it's zero. ''' dist_matrix.update(extent.from_shape(local_dist_matrix.shape), local_dist_matrix) result = core.LocalKernelResult() return result
def _find_cluster_mapper(inputs, ex, d_pts, old_centers, new_centers, new_counts, labels): centers = old_centers pts = d_pts.fetch(ex) closest = _find_closest(pts, centers) l_counts = np.zeros((centers.shape[0], 1), dtype=np.int) l_centers = np.zeros_like(centers) for i in range(centers.shape[0]): matching = (closest == i) l_counts[i] = matching.sum() l_centers[i] = pts[matching].sum(axis=0) # update centroid positions new_centers.update(extent.from_shape(new_centers.shape), l_centers) new_counts.update(extent.from_shape(new_counts.shape), l_counts) labels.update(extent.create(ex.ul, (ex.lr[0], 1), labels.shape), closest.reshape(pts.shape[0], 1)) return []