def add_distance_matrix(self, name: str, distance_matrix: sp.csr_matrix, labels: Sequence): """Add a distance matrix. Parameters ---------- name Unique identifier of the distance matrix distance_matrix sparse distance matrix `D` in CSR format labels array with row/column names of the distance matrix. `len(array) == D.shape[0] == D.shape[1]` """ if not (len(labels) == distance_matrix.shape[0] == distance_matrix.shape[1]): raise ValueError("Dimension mismatch!") if not isinstance(distance_matrix, csr_matrix): raise TypeError( "Distance matrix must be sparse and in CSR format. ") # The class relies on zeros not being explicitly stored during reverse lookup. distance_matrix.eliminate_zeros() self.distance_matrices[name] = distance_matrix self.distance_matrix_labels[name] = { k: i for i, k in enumerate(labels) } # The label "nan" does not have an index in the matrix self.distance_matrix_labels[name]["nan"] = np.nan
def add_distance_matrix( self, name: str, distance_matrix: sp.csr_matrix, labels: Sequence, labels2: Sequence = None, ): """Add a distance matrix. Parameters ---------- name Unique identifier of the distance matrix distance_matrix sparse distance matrix `D` in CSR format labels array with row names of the distance matrix. `len(array) == D.shape[0]` labels2 array with column names of the distance matrix. Can be omitted if the distance matrix is symmetric. `len(array) == D.shape[1]`. """ labels2 = labels if labels2 is None else labels2 if not len(labels) == distance_matrix.shape[0]: raise ValueError("Dimensions mismatch alon axis 0") if not len(labels2) == distance_matrix.shape[1]: raise ValueError("Dimensions mismatch alon axis 1") if not isinstance(distance_matrix, csr_matrix): raise TypeError( "Distance matrix must be sparse and in CSR format. ") # The class relies on zeros not being explicitly stored during reverse lookup. distance_matrix.eliminate_zeros() self.distance_matrices[name] = distance_matrix self.distance_matrix_labels[name] = { k: i for i, k in enumerate(labels) } self.distance_matrix_labels2[name] = { k: i for i, k in enumerate(labels2) } # The label "nan" does not have an index in the matrix self.distance_matrix_labels[name]["nan"] = np.nan self.distance_matrix_labels2[name]["nan"] = np.nan
def _setup_linear_propagator(self, struct): """Sets up the noise-independent, linear propagator Returns: The propagator as CSR Matrix """ i, j, a = hcons.setup_linear_propagator(struct.vecind, struct.indab, struct.indbl, self._h_sys, self._g, self._gamma + 1.j*self._omega, self._l_map, self._with_terminator) csr = CSRMatrix((a, j, i)) csr.sum_duplicates() csr.eliminate_zeros() return csr
def bias_knn( self, conn: csr_matrix, pseudotime: np.ndarray, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, **kwargs: Any, ) -> csr_matrix: """ Bias cell-cell connectivities of a KNN graph. Parameters ---------- conn Sparse matrix of shape ``(n_cells, n_cells)`` containing the nearest neighbor connectivities. pseudotime Pseudotemporal ordering of cells. %(parallel)s Returns ------- The biased connectivities. """ res = parallelize( self._bias_knn_helper, np.arange(conn.shape[0]), as_array=False, unit="cell", n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, )(conn, pseudotime, **kwargs) data, indices, indptr = zip(*res) conn = csr_matrix((np.concatenate(data), np.concatenate(indices), np.concatenate(indptr))) conn.eliminate_zeros() return conn
def _eliminate(matrix: sp.csr_matrix, user_indices, item_indices): matrix = matrix.copy() # `lil_matrix` is too slow matrix[list(user_indices), list(item_indices)] = 0 matrix.eliminate_zeros() return matrix
def _eliminate_subzeroes(self, m: sparse.csr_matrix, epsilon): m.data = np.where(abs(m.data) < epsilon, 0, m.data) m.eliminate_zeros()
def _eliminate(matrix: sp.csr_matrix, user_indices, item_indices): matrix = matrix.copy() matrix[user_indices, item_indices] = 0 matrix.eliminate_zeros() return matrix
def mask_test_edges(adj: sp.csr_matrix): # Function to build test set with 2% positive links # Remove diagonal elements adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() adj_triu = sp.triu(adj) adj_tuple = sparse_to_tuple(adj_triu) edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_test = int(np.floor(edges.shape[0] / 10.)) num_val = int(np.floor(edges.shape[0] / 10.)) # split to get the training, valid and test set. all_edge_idx = range(edges.shape[0]) all_edge_idx = list(all_edge_idx) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = edges[test_edge_idx] val_edges = edges[val_edge_idx] train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0) def ismember(a, b): rows_close = np.all((a - b[:, None]) == 0, axis=-1) return np.any(rows_close) test_edges_false = [] while len(test_edges_false) < len(test_edges): n_rnd = len(test_edges) - len(test_edges_false) # num_nodes = adj.shape[0] rnd = np.random.randint(0, adj.shape[0], size=2 * n_rnd) idxs_i = rnd[:n_rnd] idxs_j = rnd[n_rnd:] for i in range(n_rnd): idx_i = idxs_i[i] idx_j = idxs_j[i] if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): n_rnd = len(val_edges) - len(val_edges_false) rnd = np.random.randint(0, adj.shape[0], size=2 * n_rnd) idxs_i = rnd[:n_rnd] idxs_j = rnd[n_rnd:] for i in range(n_rnd): idx_i = idxs_i[i] idx_j = idxs_j[i] if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) # Re-build adj matrix data = np.ones(train_edges.shape[0]) adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape) adj_train = adj_train + adj_train.T return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false