def normalize_adj(self, mx: sp.coo_matrix) -> sp.coo_matrix: """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) r_inv_sqrt = np.power(rowsum, -0.5).flatten() r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0. r_mat_inv_sqrt = sp.diags(r_inv_sqrt) return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt).tocoo()
def multi_particle(H: sparse.coo_matrix) -> pyquil.paulis.PauliSum: """ Creates a Qubit-operator from a (sparse) matrix. This function uses (almost) all states and, thus, requires (approximately) log(N) qubits for an N-dimensional Hilbert space. The idea for converting a matrix element to an operator is to raise/lower the qubits differing between two states to convert one basis-state to the other. The qubits that are not negated must be checked to have the correct value (using an analogue of the counting operator) to not get extra terms (one could perhaps allow for extra terms and compensate for them later?). 0 = up 1 = down (1+Zi)/2 checks that qubit i is 0 (up) (1-Zi)/2 checks that qubit i is 1 (down) (Xi+1j*Yi)/2 lowers qubit from 1 (down) to 0 (up) (Xi-1j*Yi)/2 raises qubit from 0 (up) to 1 (down) @author: Joel :param H: An array (array-like) representing the hamiltonian. :return: Hamiltonian as PyQuil PauliSum. """ # Convert to sparse coo_matrix if not sparse.issparse(H): H = sparse.coo_matrix(H) elif H.getformat() != "coo": H = H.tocoo() # The main part of the function H_op = QubitOperator() for i, j, data in zip(H.row, H.col, H.data): new_term = QubitOperator(()) # = I for qubit in range(int.bit_length(H.shape[0] - 1)): if (i ^ j) & (1 << qubit): # lower/raise qubit new_term *= QubitOperator((qubit, "X"), 1 / 2) + \ QubitOperator((qubit, "Y"), 1j * (int( j & (1 << qubit) != 0) - 1 / 2)) else: # check that qubit has correct value (same as i and j) new_term *= QubitOperator((), 1 / 2) + \ QubitOperator((qubit, "Z"), 1 / 2 - int(j & (1 << qubit) != 0)) H_op += data * new_term return qubitop_to_pyquilpauli(H_op)
def binarize_roi_mask(roi_mask: coo_matrix, absolute_threshold: Optional[float] = None, quantile: float = 0.1) -> coo_matrix: """Binarize a coo_matrix representing an ROI mask. Parameters ---------- roi_mask : coo_matrix An ROI mask in coo_matrix format. absolute_threshold : Optional[float], optional ROI data (Suite2P weights) above and equal to the threshold will be set to 1 and set to 0 otherwise. If None is provided, the threshold will be determined via quantile. By default None. quantile : float, optional Compute the specified quantile and use it as the absolute_threshold, by default 0.1. This parameter will be ignored if an absolute_threshold is provided. Returns ------- coo_matrix A binarized version of the coo_matrix. """ if absolute_threshold is None: absolute_threshold = np.quantile(roi_mask.data, quantile) binarized_mask = roi_mask.copy() binarized_mask.data = np.where(binarized_mask.data >= absolute_threshold, 1, 0) binarized_mask.eliminate_zeros() return binarized_mask
def invert_qubits_state(state: coo_matrix, length: int) -> coo_matrix: new_idx = range(2**length) result = np.array(list( map(lambda i: aux.to_decimal(aux.decimal_to_binary(i, length)[::-1]), new_idx)), dtype=np.int64) return state.toarray().reshape((2**length), )[result]
def normalize_sp(mx: sp.coo_matrix) -> sp.coo_matrix: rows_sum = np.array(mx.sum(1)).astype('float') # 对每一行求和 rows_inv = np.power(rows_sum, -1).flatten() # 求倒数 rows_inv[np.isinf(rows_inv)] = 0 # 如果某一行全为0,则r_inv算出来会等于无穷大,将这些行的r_inv置为0 rows_mat_inv = sp.diags(rows_inv) # 构建对角元素为r_inv的对角矩阵 mx = rows_mat_inv.dot(mx) # .dot(cols_mat_inv) return mx
def one_particle(H: sparse.coo_matrix) -> pyquil.paulis.PauliSum: """ Generates a PauliSum(pyquil) given a Hamiltonian-matrix. Creates a Hamiltonian operator from a (sparse) matrix. This function uses a one-particle formulation and, thus, requires N qubits for an N-dimensional Hilbert space. @author: Axel, Joel :param H: An array (array_like) representing the hamiltonian. :return: Hamiltonian as PyQuil PauliSum. """ # Create the Hamiltonian with a and a^dagger Hamiltonian = FermionOperator() if sparse.issparse(H): H = H.tocoo() for i, j, data in zip(H.row, H.col, H.data): Hamiltonian += data * FermionOperator(((int(i), 1), (int(j), 0))) else: if not isinstance(H, np.ndarray): H = np.asanyarray(H) for i in range(H.shape[0]): for j in range(H.shape[1]): Hamiltonian += H[i, j] * FermionOperator(((i, 1), (j, 0))) Hamiltonian = jordan_wigner(Hamiltonian) Hamiltonian = qubitop_to_pyquilpauli(Hamiltonian) return Hamiltonian
def normalize(self, mx: sp.coo_matrix) -> sp.coo_matrix: """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) r_inv = np.power(rowsum, -1.0).flatten() r_inv[np.isinf(r_inv)] = 0. r_mat_inv = sp.diags(r_inv) mx = r_mat_inv.dot(mx).tocoo() return mx
def visualize_matrix(h: sparse.coo_matrix): """ :param h: Matrice creuse sous forme de coo_matrix :return: pd.Dataframe """ normal_matrix = h.todense() return pd.DataFrame(normal_matrix)
def run_leiden( graph: sp.coo_matrix, directed: bool, partition_type: Optional[Type[MutableVertexPartition]], resolution_parameter: float, n_iterations: int, seed: Optional[int], use_weights: bool, kargs, ) -> Tuple[np.ndarray, float]: """ Wrapper for leiden community detection Args: graph (sp.coo_matrix): Affinity matrix directed (bool): See below in 'cluster()' partition_type (Optional[Type[MutableVertexPartition]]): See below in 'cluster()' resolution_parameter (float): See below in 'cluster()' n_iterations (int): See below in 'cluster()' seed (Optional[int]): See below in 'cluster()' use_weights (bool): See below in 'cluster()' kargs: See below in 'cluster()' Returns: communities, Q (Tuple[np.ndarray, float]): See below in 'cluster()' """ # convert resulting graph from scipy.sparse.coo.coo_matrix to Graph object # get indices of vertices edgelist = np.vstack(graph.nonzero()).T.tolist() g = ig.Graph(max(graph.shape), edgelist, directed=directed) # set vertices as weights g.es["weights"] = graph.data if not partition_type: partition_type = leidenalg.RBConfigurationVertexPartition if resolution_parameter: kargs["resolution_parameter"] = resolution_parameter if use_weights: kargs["weights"] = np.array(g.es["weights"]).astype("float64") kargs["n_iterations"] = n_iterations kargs["seed"] = seed print("Running Leiden optimization", flush=True) tic_ = time.time() communities = leidenalg.find_partition( g, partition_type=partition_type, **kargs, ) Q = communities.q print( "Leiden completed in {} seconds".format(time.time() - tic_), flush=True, ) communities = np.asarray(communities.membership) return communities, Q
def plot_binarized_vs_weighted_roi( weighted_mask: coo_matrix, binary_mask: coo_matrix, weighted_trace: np.ndarray, binary_trace: np.ndarray) -> matplotlib.figure.Figure: fig = plt.figure(constrained_layout=True) gs = fig.add_gridspec(nrows=3, ncols=6) # Plot ROIs binary_roi_ax = fig.add_subplot(gs[:-1, -3:]) weighted_roi_ax = fig.add_subplot(gs[:-1, :-3], sharex=binary_roi_ax, sharey=binary_roi_ax) weighted_roi_ax.set_xticks([]) weighted_roi_ax.set_yticks([]) weighted_roi_ax.set_title("Native Suite2P (weighted) ROI") weighted_roi_ax.imshow(weighted_mask.toarray()) xmin, xmax, ymin, ymax = weighted_roi_ax.axis() binary_roi_ax.set_xticks([]) binary_roi_ax.set_yticks([]) binary_roi_ax.set_title("Binarized Suite2P ROI") binary_roi_ax.imshow(binary_mask.toarray()) binary_roi_ax.set_xlim(xmin, xmax) binary_roi_ax.set_ylim(ymin, ymax) # Plot traces binary_trace_ax = fig.add_subplot(gs[-1, -3:]) weighted_trace_ax = fig.add_subplot(gs[-1, :-3], sharex=binary_trace_ax, sharey=binary_trace_ax) weighted_trace_ax.set_ylabel("Weighted F") weighted_trace_ax.set_xlabel("Frame Number") weighted_trace_ax.plot(range(len(weighted_trace)), weighted_trace, linewidth=0.5) binary_trace_ax.set_ylabel("Binarized F") binary_trace_ax.set_xlabel("Frame Number") binary_trace_ax.plot(range(len(binary_trace)), binary_trace, linewidth=0.25) return fig
def col_compress(matrix_in: coo_matrix, indices: bool = False) -> csr_matrix: matrix = np.sort(matrix_in.toarray(), axis=1) indices_out = [] for col_index in range(matrix.shape[1] - 1): if np.array_equal(matrix[:, col_index], matrix[:, col_index + 1]): indices_out.append(col_index) if indices: return indices_out return csr_matrix( matrix[:, list(set(range(matrix.shape[1])) - set(indices_out))], dtype=np.uint8)
def scipy2torch_sparse(self, mx: sp.coo_matrix) -> torch.sparse.FloatTensor: values = mx.data indices = np.vstack((mx.row, mx.col)) i = torch.LongTensor(indices) v = torch.FloatTensor(values) shape = mx.shape mx = torch.sparse.FloatTensor(i, v, torch.Size(shape)) if self.use_cuda: mx = mx.cuda(non_blocking=True) return mx
def mat_all_point(m_mat: coo_matrix, vertex: list, alpha: float): """ 获取E-alpha*m_mat.T :param m_mat: :param vertex: total user and item point :param alpha: the prob for random walking :return: """ total_len = len(vertex) row = np.array(range(total_len)) col = np.array(range(total_len)) data = np.ones(total_len) eye_t = coo_matrix((data, (row, col)), shape=(total_len, total_len)) print(eye_t.todense()) return eye_t.tocsr() - alpha * m_mat.tocsr().transpose()
def dump_data(self, time_step: Optional[datetime], x_analysis: np.array, p_analysis: np.array, p_analysis_inv: sp.coo_matrix, state_mask: np.array, n_params: int): data = [] for param in self.param_positions: index = self.param_positions[param] param_values = np.zeros(state_mask.shape, dtype=np.float32) param_values[state_mask] = x_analysis[index::n_params] data.append(param_values) param_unc = np.zeros(state_mask.shape, dtype=np.float32) param_unc[state_mask] = 1. / np.sqrt( p_analysis_inv.diagonal()[index::n_params]) data.append(param_unc) self.writer.write(data, self._width, self._height, self._offset_x, self._offset_y)
def _build_feat_tensor(self, feat_matrix: sp.coo_matrix, device: Optional[T.device] = T.device('cpu')): feat_index_list = feat_matrix.nonzero() feat_index_array: np.ndarray = np.vstack(feat_index_list) feat_index = T.tensor(feat_index_array.tolist(), dtype=T.long, device=device) feat_value = T.tensor(feat_matrix.data, dtype=T.double, device=device) feat_tensor = T.sparse_coo_tensor(feat_index, feat_value, size=feat_matrix.shape, device=device) return feat_tensor
def compute_transition_matrix(self, knn: sparse.coo_matrix, x: np.ndarray, v: np.ndarray, epsilon: float = 0.0, reverse: bool = False) -> sparse.csr_matrix: """ Compute a right-stochastic matrix representing transition probabilities from each node Args: knn KNN graph (n_cells, n_cells) x Embedding positions (n_cells, n_dims) v Velocities on the embedding (n_cells, n_dims) reverse Compute the reverse transition matrix (for backwards diffusion) Remarks: Computes a Markov transition matrix for the KNN graph. The probability of transition along an edge is proportional to the scalar projection of the velocity vector onto that edge, times the reciprocal of the edge length. Edges that get negative scalar projections are clipped to zero and the total non-zero outgoing edges are normalized to a sum of 1.0. """ # vertices for each edge knn = knn.tocoo() (v0, v1) = (knn.row, knn.col) # calculate edge unit vectors uv = x[v1] - x[ v0] # Vector corresponding to an edge from v0 to v1, shape (n_edges, n_dims) norms = np.linalg.norm(uv, axis=1) uv = uv / norms[:, None] # Convert to unit vector # Project the velocity vectors onto edges, and clip to zero scalar_projection = np.array([a.dot(b) for a, b in zip(v[v0], uv) ]) # Shape: (n_edges) if reverse: scalar_projection = -scalar_projection scalar_projection += epsilon # scalar_projection += scalar_projection.min() np.clip(scalar_projection, a_min=0, a_max=None, out=scalar_projection) # Calculate transition probabilities p = scalar_projection * (1 / norms) tr = normalize(sparse.coo_matrix((p, (v0, v1))).tocsr(), axis=1, norm='l1') return tr
def reorder_rows(X: coo_matrix, idx: np.ndarray) -> None: """ Reorders the rows of the COO sparse matrix given in argument. Parameters ---------- X : scipy.sparse.coo_matrix The sparse matrix to reorder. idx: numpy.ndarray, shape=(X.shape[0],) Row indices used to reorder the matrix. """ idx = idx.flatten() assert isinstance( X, scipy.sparse.coo_matrix ), "X must be scipy.sparse.coo_matrix" assert X.shape[0] == idx.shape[0], "idx shape[0] must be X shape[0]" idx = np.argsort(idx) idx = np.asarray(idx, dtype=X.row.dtype) X.row = idx[X.row]
def _hu_moments(roi: coo_matrix) -> np.ndarray: """Returns the 7 Hu moments for an ROI image. See https://scikit-image.org/docs/0.17.x/api/skimage.measure.html#moments-hu # noqa for more information. Returns ------- 7-element, 1d np.array of Hu's image moments References ---------- M. K. Hu, “Visual Pattern Recognition by Moment Invariants”, IRE Trans. Info. Theory, vol. IT-8, pp. 179-187, 1962 """ roi_image = roi.toarray() mu = moments_central(roi_image) nu = moments_normalized(mu) return moments_hu(nu)
def laplacian_score(X: np.ndarray, W: sparse.coo_matrix) -> np.ndarray: """ This function implements the laplacian score feature selection, steps are as follows: 1. Construct the affinity matrix W if it is not specified 2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W 3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones) 4. Laplacian score for the r-th feature is score = (fr_hat'*L*fr_hat)/(fr_hat'*D*fr_hat) Input ----- X: {numpy array}, shape (n_samples, n_features) input data W: {sparse matrix}, shape (n_samples, n_samples) input affinity matrix Output ------ score: {numpy array}, shape (n_features,) laplacian score for each feature Reference --------- He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005. """ # build the diagonal D matrix from affinity matrix W D = np.array(W.sum(axis=1)) L = W tmp = np.dot(np.transpose(D), X) D = sparse.diags(np.transpose(D), [0]) Xt = np.transpose(X) t1 = np.transpose(np.dot(Xt, D.todense())) t2 = np.transpose(np.dot(Xt, L.todense())) # compute the numerator of Lr D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp) / D.sum() # compute the denominator of Lr L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp) / D.sum() # avoid the denominator of Lr to be 0 D_prime[D_prime < 1e-12] = 10000 # compute laplacian score for all features score = 1 - np.array(np.multiply(L_prime, 1 / D_prime))[0, :] return np.transpose(score)
def layout_knn(self, knn: sparse.coo_matrix) -> np.ndarray: edges = np.stack((knn.row, knn.col), axis=1) # Calculate Jaccard similarities js = [] # type: List[float] knncsr = knn.tocsr() for i, j in edges: r = knncsr.getrow(i) c = knncsr.getrow(j) shared = r.minimum(c).nnz total = r.maximum(c).nnz js.append(shared / total) weights = np.array(js) + 0.00001 # OpenOrd doesn't like 0 weights self.graph = nx.Graph() self.graph.add_nodes_from(range(knn.shape[0])) for i, edge in enumerate(edges): self.graph.add_edge(edge[0], edge[1], {'weight': weights[i]}) return self.layout(self.graph)
def _iter_ids(self, ids: ndarray, mat: coo_matrix, n_dim: int) -> Iterator[ndarray]: """ Iterate over metadata vectors of size 'n_dim' encoded in 'mat'. If too few metadata elements are found to satisfy 'n_dim' requirement, the vectors will be padded with zeros to ensure homogeneity. See Also -------- Dataset._iter_meta """ if mat is not None: yield from self._iter_meta(ids, mat.tocsr(), n_dim) elif n_dim > 1: ids = np.c_[ids, np.zeros((len(ids), n_dim - 1), dtype=int)] yield from (_ for _ in ids) else: yield from (_ for _ in ids.reshape(-1, 1))
def fit_predict(self, knn: sparse.coo_matrix) -> np.ndarray: """ Given a sparse adjacency matrix, perform Louvain-Jaccard clustering Args: knn: The sparse adjacency matrix Returns: labels: The cluster labels Remarks: After clustering, the Louvain-Jaccard weighted undirected graph is available as the property 'graph' of type nx.Graph, and also in the form of a sparse adjacency matrix as the property 'lj_knn' of type scipy.sparse.coo_matrix """ if self.jaccard: edges = np.stack((knn.row, knn.col), axis=1) # Calculate Jaccard similarities js = [] # type: List[float] knncsr = knn.tocsr() for i, j in edges: r = knncsr.getrow(i) c = knncsr.getrow(j) shared = r.minimum(c).nnz total = r.maximum(c).nnz if total > 0: js.append(shared / total) else: js.append(0) weights = np.array(js) + 0.00001 # OpenOrd doesn't like 0 weights self.lj_knn = sparse.coo_matrix((weights, (knn.row, knn.col))) self.graph = nx.Graph() for i, edge in enumerate(edges): self.graph.add_edge(edge[0], edge[1], {'weight': weights[i]}) else: self.graph = nx.from_scipy_sparse_matrix(knn) partitions = community.best_partition(self.graph, resolution=self.resolution) return np.array([partitions[key] for key in range(knn.shape[0])])
def meanFilterSparse(a: sp.coo_matrix, h: int): """Apply a mean filter to an input sparse matrix. This convolves the input with a kernel of size 2*h + 1 with constant entries and subsequently reshape the output to be of the same shape as input Args: a: `sp.coo_matrix`, Input matrix to be filtered h: `int` half-size of the filter Returns: `sp.coo_matrix` filterd matrix """ assert h > 0, "meanFilterSparse half-size must be greater than 0" assert sp.issparse(a) and a.getformat() == 'coo',\ "meanFilterSparse input matrix is not scipy.sparse.coo_matrix" assert a.shape[0] == a.shape[1],\ "meanFilterSparse cannot handle non-square matrix" fSize = 2 * h + 1 # filter is a square matrix of constant 1 of shape (fSize, fSize) shapeOut = np.array(a.shape) + fSize - 1 mToeplitz = sp.diags(np.ones(fSize), np.arange(-fSize + 1, 1), shape=(shapeOut[1], a.shape[1]), format='csr') ans = sp.coo_matrix((mToeplitz @ a) @ mToeplitz.T) # remove the edges since we don't care about them if we are smoothing # the matrix itself ansNoEdge = ans.tocsr()[h:(h + a.shape[0]), h:(h + a.shape[1])].tocoo() # Assign different number of neighbors to the edge to better # match what the original R implementation of HiCRep does rowDist2Edge = np.minimum(ansNoEdge.row, ansNoEdge.shape[0] - 1 - ansNoEdge.row) nDim1 = h + 1 + np.minimum(rowDist2Edge, h) colDist2Edge = np.minimum(ansNoEdge.col, ansNoEdge.shape[1] - 1 - ansNoEdge.col) nDim2 = h + 1 + np.minimum(colDist2Edge, h) nNeighbors = nDim1 * nDim2 ansNoEdge.data /= nNeighbors return ansNoEdge
def crop_roi_mask(roi_mask: coo_matrix) -> coo_matrix: """Crop ROI mask into smallest rectangle that fits all nonzero elements Parameters ---------- roi_mask : coo_matrix Returns ------- coo_matrix A cropped ROI mask or None if coo_matrix is empty """ bounds = roi_bounds(roi_mask) if bounds is None: return None # Convert coo to csr matrix so we can take advantage of indexing cropped_mask = roi_mask.tocsr()[bounds[0]:bounds[1], bounds[2]:bounds[3]] return cropped_mask.tocoo()
def get_lshash(text: coo_matrix) -> List[str]: """ Return list of cosine LSHs encoding text. """ def cosine_LSH(vector, planes): """ Return a single cosine LSH for a particular record and given planes. """ sig = 0 for plane in planes: sig <<= 1 if vector.dot(plane) >= 0: sig |= 1 return str(sig) bits = 512 random_projections = np.random.randn(bits, text.shape[1]) hashes = [ cosine_LSH(text.getrow(idx), random_projections) for idx in range(text.shape[0]) ] return hashes
def truncate_unrated_movies(um: coo_matrix, row2uid: np.ndarray, col2mid: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Truncate movies that have not at all rated. Arguments: um {coo_matrix} -- U*M rating table where U is the number of users whereas M is the number of movies. row2uid {np.ndarray} -- mapping from row index to USER_ID. col2mid {np.ndarray} -- mapping from col index to MOVIE_ID. Returns: Tuple[coo_matrix, np.ndarray, np.ndarray] -- coo_matrix: U*M_trunc sparse rating tables. np.ndarray, np.ndarray: MOVIE_ID mapping after truncation and USER_ID mapping. """ movie_ratings = um.tocsc() num_ratings_per_movie = np.sum(movie_ratings != 0, axis=0) movie_with_no_ratings = np.squeeze(np.asarray(num_ratings_per_movie != 0)) return movie_ratings[:, movie_with_no_ratings].tocoo(), \ row2uid, \ col2mid[movie_with_no_ratings]
def getDegrees(mtx: sp.coo_matrix) -> int: return np.array(mtx.sum(axis=0)).squeeze()
def stage_data( spots: pd.DataFrame, coo: coo_matrix) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Reads the spots and the label image that are passed in and calculates which cell (if any) encircles any given spot within its boundaries. It also retrieves the coordinates of the cell boundaries, the cell centroids and the cell area """ logger.info(' Number of spots passed-in: %d' % spots.shape[0]) logger.info(' Number of segmented cells: %d' % len(set(coo.data))) logger.info( ' Segmentation array implies that image has width: %dpx and height: %dpx' % (coo.shape[1], coo.shape[0])) mask_x = (spots.x >= 0) & (spots.x <= coo.shape[1]) mask_y = (spots.y >= 0) & (spots.y <= coo.shape[0]) spots = spots[mask_x & mask_y] # Debugging code! # resuffle # spots = spots.sample(frac=1).reset_index(drop=True) # _point = [5471-14, 110] # logger.info('label at (y, x): (%d, %d) is %d' % (_point[0], _point[1], coo.toarray()[_point[0], _point[1]])) # coo = remap_labels(coo) # logger.info('remapped label at (y, x): (%d, %d) is %d' % (_point[0], _point[1], coo.toarray()[_point[0], _point[1]])) # 1. Find which cell the spots lie within yx_coords = spots[['y', 'x']].values.T inc = inside_cell(coo.tocsr(), yx_coords) spots = spots.assign(label=inc) # 2. Get cell centroids and area props = skmeas.regionprops(coo.toarray().astype(np.int32)) props_df = pd.DataFrame(data=[ (d.label, d.area, d.centroid[1], d.centroid[0]) for d in props ], columns=['label', 'area', 'x_cell', 'y_cell']) # 3. Get the cell boundaries cell_boundaries = extract_borders_dip(coo.toarray().astype(np.uint32), 0, 0, [0]) assert props_df.shape[0] == cell_boundaries.shape[0] == coo.data.max() assert set(spots.label[spots.label > 0]) <= set(props_df.label) cells = props_df.merge(cell_boundaries) cells.sort_values(by=['label', 'x_cell', 'y_cell']) assert cells.shape[0] == cell_boundaries.shape[0] == props_df.shape[0] # join spots and cells on the cell label so you can get the x,y coords of the cell for any given spot spots = spots.merge(cells, how='left', on=['label']) _cells = cells[['label', 'area', 'x_cell', 'y_cell']].rename(columns={ 'x_cell': 'x', 'y_cell': 'y' }) _cell_boundaries = cells[['label', 'coords']] _spots = spots[['x', 'y', 'label', 'Gene', 'x_cell', 'y_cell']].rename(columns={ 'Gene': 'target', 'x': 'x_global', 'y': 'y_global' }) return _cells, _cell_boundaries, _spots
def explicit_to_implicit(rating_mtx: coo_matrix): rating_mtx.data = np.ones(shape=(rating_mtx.nnz, ), dtype=np.float)
def get_entity_corr_coef(interactions: coo_matrix, entity_id: int, entity_type: str, embeddings: dict, ignore_sparse_zeros=True, use_zero_mean=False, corr_type='pearson', neg_sampling=False, check_normal_dist=True): """ Assumes a rating matrix with rows for users and columns for items """ p = embeddings['user'].shape[1] cov_for_p_variables = [] if entity_type == 'user': embed = embeddings['user'][entity_id] # embedding used for covariance computation cov_embed = embeddings['item'] # ratings used for covariance computation ratings = np.squeeze( np.asarray(interactions.tocsr()[entity_id, :].todense())) elif entity_type == 'item': embed = embeddings['item'][entity_id] # embedding used for covariance computation cov_embed = embeddings['user'] # ratings used for covariance computation ratings = np.squeeze( np.asarray(interactions.tocsr()[:, entity_id].todense())) if ignore_sparse_zeros: idx = np.where(ratings != 0)[0] ratings = ratings[idx] # TODO: Use `sample_items` method # Use this for BPR if neg_sampling: if entity_type == 'user': n_sample = interactions.shape[1] else: n_sample = interactions.shape[0] neg_idx = np.random.randint(n_sample, size=len(idx)) # neg_idx = np.random.choice(np.setdiff1d(np.arange(interactions.n_items), # pos_idx), size=len(pos_idx), # replace=False) neg_ratings = [0] * len(ratings) idx = np.concatenate([idx, neg_idx]) ratings = np.concatenate([ratings, neg_ratings]) cov_embed = cov_embed[idx] for k in range(p): cov_embed_latent_variables_at_k = cov_embed[:, k] cov_mat_for_k = get_cov(ratings, cov_embed_latent_variables_at_k, use_zero_mean=use_zero_mean) cov_for_k = cov_mat_for_k[0, 1] cov_for_p_variables.append(cov_for_k) # TODO: Change from printing back to logging if check_normal_dist: alpha = 1e-3 p_embed = normaltest(embed)[1] p_cov_for_p_variables = normaltest(cov_for_p_variables)[1] if p_embed < alpha: print( f"{entity_type}-{entity_id}: Entity Embeddings are unlikely normally distributed." ) if p_cov_for_p_variables < alpha: print( f"{entity_type}-{entity_id}: Covariances are unlikely normally distributed." ) cov_for_p_variables = np.array(cov_for_p_variables) corr_coef = get_corr_coef(embed, cov_for_p_variables, corr_type=corr_type) return corr_coef