def sparsify(distMat, cutoff, kNN, threads): sparse_coordinates = pp_sketchlib.sparsifyDists(distMat, distCutoff=cutoff, kNN=kNN) sparse_scipy = ijv_to_coo(sparse_coordinates, distMat.shape, np.float32) # Mirror to fill in lower triangle if cutoff > 0: sparse_scipy = sparse_scipy + sparse_scipy.transpose() return (sparse_scipy)
def fit(self, X, accessory): '''Extends :func:`~ClusterFit.fit` Gets assignments by using nearest neigbours. Args: X (numpy.array) The core and accessory distances to cluster. Must be set if preprocess is set. accessory (bool) Use accessory rather than core distances Returns: y (numpy.array) Cluster assignments of samples in X ''' ClusterFit.fit(self, X) sample_size = int(round(0.5 * (1 + np.sqrt(1 + 8 * X.shape[0])))) if (max(self.ranks) >= sample_size): sys.stderr.write("Rank must be less than the number of samples") sys.exit(0) if accessory: self.dist_col = 1 else: self.dist_col = 0 self.nn_dists = {} for rank in self.ranks: row, col, data = \ pp_sketchlib.sparsifyDists( pp_sketchlib.longToSquare(X[:, [self.dist_col]], self.threads), 0, rank ) data = [epsilon if d < epsilon else d for d in data] if self.use_gpu: self.nn_dists[rank] = cupyx.scipy.sparse.coo_matrix( (cp.array(data), (cp.array(row), cp.array(col))), shape=(sample_size, sample_size), dtype=X.dtype) else: self.nn_dists[rank] = scipy.sparse.coo_matrix( (data, (row, col)), shape=(sample_size, sample_size), dtype=X.dtype) self.fitted = True y = self.assign(min(self.ranks)) return y
def sparsify(distMat, cutoff, kNN, threads): sparse_coordinates = pp_sketchlib.sparsifyDists(distMat, distCutoff=cutoff, kNN=kNN, num_threads=threads) sparse_scipy = coo_matrix((sparse_coordinates[2], (sparse_coordinates[0], sparse_coordinates[1])), shape=distMat.shape, dtype=np.float32) # Mirror to fill in lower triangle if cutoff > 0: sparse_scipy = sparse_scipy + sparse_scipy.transpose() return(sparse_scipy)