Esempio n. 1
0
 def normalize_adj(self, mx: sp.coo_matrix) -> sp.coo_matrix:
     """Row-normalize sparse matrix"""
     rowsum = np.array(mx.sum(1))
     r_inv_sqrt = np.power(rowsum, -0.5).flatten()
     r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
     r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
     return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt).tocoo()
Esempio n. 2
0
def multi_particle(H: sparse.coo_matrix) -> pyquil.paulis.PauliSum:
    """
    Creates a Qubit-operator from a (sparse) matrix. This function uses
    (almost) all states and, thus, requires (approximately) log(N) qubits
    for an N-dimensional Hilbert space.

    The idea for converting a matrix element to an operator is to raise/lower
    the qubits differing between two states to convert one basis-state to the
    other. The qubits that are not negated must be checked to have the correct
    value (using an analogue of the counting operator) to not get extra terms
    (one could perhaps allow for extra terms and compensate for them later?).

    0 = up
    1 = down

    (1+Zi)/2 checks that qubit i is 0 (up)
    (1-Zi)/2 checks that qubit i is 1 (down)
    (Xi+1j*Yi)/2 lowers qubit from 1 (down) to 0 (up)
    (Xi-1j*Yi)/2 raises qubit from 0 (up) to 1 (down)

    @author: Joel

    :param H: An array (array-like) representing the hamiltonian.
    :return: Hamiltonian as PyQuil PauliSum.
    """
    # Convert to sparse coo_matrix
    if not sparse.issparse(H):
        H = sparse.coo_matrix(H)
    elif H.getformat() != "coo":
        H = H.tocoo()
    # The main part of the function
    H_op = QubitOperator()
    for i, j, data in zip(H.row, H.col, H.data):
        new_term = QubitOperator(())  # = I
        for qubit in range(int.bit_length(H.shape[0] - 1)):
            if (i ^ j) & (1 << qubit):
                # lower/raise qubit
                new_term *= QubitOperator((qubit, "X"), 1 / 2) + \
                            QubitOperator((qubit, "Y"),
                                          1j * (int(
                                              j & (1 << qubit) != 0) - 1 / 2))
            else:
                # check that qubit has correct value (same as i and j)
                new_term *= QubitOperator((), 1 / 2) + \
                            QubitOperator((qubit, "Z"),
                                          1 / 2 - int(j & (1 << qubit) != 0))
        H_op += data * new_term
    return qubitop_to_pyquilpauli(H_op)
Esempio n. 3
0
def binarize_roi_mask(roi_mask: coo_matrix,
                      absolute_threshold: Optional[float] = None,
                      quantile: float = 0.1) -> coo_matrix:
    """Binarize a coo_matrix representing an ROI mask.

    Parameters
    ----------
    roi_mask : coo_matrix
        An ROI mask in coo_matrix format.
    absolute_threshold : Optional[float], optional
        ROI data (Suite2P weights) above and equal to the threshold will be
        set to 1 and set to 0 otherwise. If None is provided, the threshold
        will be determined via quantile. By default None.
    quantile : float, optional
        Compute the specified quantile and use it as the absolute_threshold,
        by default 0.1. This parameter will be ignored if an absolute_threshold
        is provided.

    Returns
    -------
    coo_matrix
        A binarized version of the coo_matrix.
    """
    if absolute_threshold is None:
        absolute_threshold = np.quantile(roi_mask.data, quantile)

    binarized_mask = roi_mask.copy()
    binarized_mask.data = np.where(binarized_mask.data >= absolute_threshold,
                                   1, 0)
    binarized_mask.eliminate_zeros()

    return binarized_mask
Esempio n. 4
0
def invert_qubits_state(state: coo_matrix, length: int) -> coo_matrix:
    new_idx = range(2**length)
    result = np.array(list(
        map(lambda i: aux.to_decimal(aux.decimal_to_binary(i, length)[::-1]),
            new_idx)),
                      dtype=np.int64)
    return state.toarray().reshape((2**length), )[result]
Esempio n. 5
0
def normalize_sp(mx: sp.coo_matrix) -> sp.coo_matrix:
    rows_sum = np.array(mx.sum(1)).astype('float')  # 对每一行求和
    rows_inv = np.power(rows_sum, -1).flatten()  # 求倒数
    rows_inv[np.isinf(rows_inv)] = 0  # 如果某一行全为0,则r_inv算出来会等于无穷大,将这些行的r_inv置为0
    rows_mat_inv = sp.diags(rows_inv)  # 构建对角元素为r_inv的对角矩阵
    mx = rows_mat_inv.dot(mx)  # .dot(cols_mat_inv)
    return mx
Esempio n. 6
0
def one_particle(H: sparse.coo_matrix) -> pyquil.paulis.PauliSum:
    """
    Generates a PauliSum(pyquil) given a Hamiltonian-matrix.

    Creates a Hamiltonian operator from a (sparse) matrix. This function uses
    a one-particle formulation and, thus, requires N qubits for an N-dimensional
    Hilbert space.

    @author: Axel, Joel

    :param H: An array (array_like) representing the hamiltonian.
    :return: Hamiltonian as PyQuil PauliSum.
    """
    # Create the Hamiltonian with a and a^dagger
    Hamiltonian = FermionOperator()
    if sparse.issparse(H):
        H = H.tocoo()
        for i, j, data in zip(H.row, H.col, H.data):
            Hamiltonian += data * FermionOperator(((int(i), 1), (int(j), 0)))
    else:
        if not isinstance(H, np.ndarray):
            H = np.asanyarray(H)
        for i in range(H.shape[0]):
            for j in range(H.shape[1]):
                Hamiltonian += H[i, j] * FermionOperator(((i, 1), (j, 0)))

    Hamiltonian = jordan_wigner(Hamiltonian)
    Hamiltonian = qubitop_to_pyquilpauli(Hamiltonian)
    return Hamiltonian
Esempio n. 7
0
 def normalize(self, mx: sp.coo_matrix) -> sp.coo_matrix:
     """Row-normalize sparse matrix"""
     rowsum = np.array(mx.sum(1))
     r_inv = np.power(rowsum, -1.0).flatten()
     r_inv[np.isinf(r_inv)] = 0.
     r_mat_inv = sp.diags(r_inv)
     mx = r_mat_inv.dot(mx).tocoo()
     return mx
Esempio n. 8
0
def visualize_matrix(h: sparse.coo_matrix):
    """

    :param h: Matrice creuse sous forme de coo_matrix
    :return: pd.Dataframe
    """
    normal_matrix = h.todense()
    return pd.DataFrame(normal_matrix)
Esempio n. 9
0
def run_leiden(
    graph: sp.coo_matrix,
    directed: bool,
    partition_type: Optional[Type[MutableVertexPartition]],
    resolution_parameter: float,
    n_iterations: int,
    seed: Optional[int],
    use_weights: bool,
    kargs,
) -> Tuple[np.ndarray, float]:
    """
    Wrapper for leiden community detection

    Args:
        graph (sp.coo_matrix): Affinity matrix
        directed (bool): See below in 'cluster()'
        partition_type (Optional[Type[MutableVertexPartition]]): See below in 'cluster()'
        resolution_parameter (float): See below in 'cluster()'
        n_iterations (int): See below in 'cluster()'
        seed (Optional[int]): See below in 'cluster()'
        use_weights (bool): See below in 'cluster()'
        kargs: See below in 'cluster()'

    Returns:
        communities, Q (Tuple[np.ndarray, float]): See below in 'cluster()'
    """

    # convert resulting graph from scipy.sparse.coo.coo_matrix to Graph object
    # get indices of vertices
    edgelist = np.vstack(graph.nonzero()).T.tolist()
    g = ig.Graph(max(graph.shape), edgelist, directed=directed)
    # set vertices as weights
    g.es["weights"] = graph.data

    if not partition_type:
        partition_type = leidenalg.RBConfigurationVertexPartition
    if resolution_parameter:
        kargs["resolution_parameter"] = resolution_parameter
    if use_weights:
        kargs["weights"] = np.array(g.es["weights"]).astype("float64")
    kargs["n_iterations"] = n_iterations
    kargs["seed"] = seed

    print("Running Leiden optimization", flush=True)
    tic_ = time.time()
    communities = leidenalg.find_partition(
        g,
        partition_type=partition_type,
        **kargs,
    )
    Q = communities.q
    print(
        "Leiden completed in {} seconds".format(time.time() - tic_),
        flush=True,
    )
    communities = np.asarray(communities.membership)

    return communities, Q
Esempio n. 10
0
def plot_binarized_vs_weighted_roi(
        weighted_mask: coo_matrix, binary_mask: coo_matrix,
        weighted_trace: np.ndarray,
        binary_trace: np.ndarray) -> matplotlib.figure.Figure:
    fig = plt.figure(constrained_layout=True)
    gs = fig.add_gridspec(nrows=3, ncols=6)

    # Plot ROIs
    binary_roi_ax = fig.add_subplot(gs[:-1, -3:])
    weighted_roi_ax = fig.add_subplot(gs[:-1, :-3],
                                      sharex=binary_roi_ax,
                                      sharey=binary_roi_ax)

    weighted_roi_ax.set_xticks([])
    weighted_roi_ax.set_yticks([])
    weighted_roi_ax.set_title("Native Suite2P (weighted) ROI")
    weighted_roi_ax.imshow(weighted_mask.toarray())
    xmin, xmax, ymin, ymax = weighted_roi_ax.axis()

    binary_roi_ax.set_xticks([])
    binary_roi_ax.set_yticks([])
    binary_roi_ax.set_title("Binarized Suite2P ROI")
    binary_roi_ax.imshow(binary_mask.toarray())
    binary_roi_ax.set_xlim(xmin, xmax)
    binary_roi_ax.set_ylim(ymin, ymax)

    # Plot traces
    binary_trace_ax = fig.add_subplot(gs[-1, -3:])
    weighted_trace_ax = fig.add_subplot(gs[-1, :-3],
                                        sharex=binary_trace_ax,
                                        sharey=binary_trace_ax)

    weighted_trace_ax.set_ylabel("Weighted F")
    weighted_trace_ax.set_xlabel("Frame Number")
    weighted_trace_ax.plot(range(len(weighted_trace)),
                           weighted_trace,
                           linewidth=0.5)

    binary_trace_ax.set_ylabel("Binarized F")
    binary_trace_ax.set_xlabel("Frame Number")
    binary_trace_ax.plot(range(len(binary_trace)),
                         binary_trace,
                         linewidth=0.25)

    return fig
Esempio n. 11
0
def col_compress(matrix_in: coo_matrix, indices: bool = False) -> csr_matrix:
    matrix = np.sort(matrix_in.toarray(), axis=1)
    indices_out = []
    for col_index in range(matrix.shape[1] - 1):
        if np.array_equal(matrix[:, col_index], matrix[:, col_index + 1]):
            indices_out.append(col_index)
    if indices:
        return indices_out
    return csr_matrix(
        matrix[:, list(set(range(matrix.shape[1])) - set(indices_out))],
        dtype=np.uint8)
Esempio n. 12
0
    def scipy2torch_sparse(self,
                           mx: sp.coo_matrix) -> torch.sparse.FloatTensor:
        values = mx.data
        indices = np.vstack((mx.row, mx.col))

        i = torch.LongTensor(indices)
        v = torch.FloatTensor(values)
        shape = mx.shape
        mx = torch.sparse.FloatTensor(i, v, torch.Size(shape))
        if self.use_cuda:
            mx = mx.cuda(non_blocking=True)
        return mx
Esempio n. 13
0
def mat_all_point(m_mat: coo_matrix, vertex: list, alpha: float):
    """
    获取E-alpha*m_mat.T
    :param m_mat:
    :param vertex: total user and item point
    :param alpha: the prob for random walking
    :return:
    """
    total_len = len(vertex)
    row = np.array(range(total_len))
    col = np.array(range(total_len))
    data = np.ones(total_len)
    eye_t = coo_matrix((data, (row, col)), shape=(total_len, total_len))
    print(eye_t.todense())
    return eye_t.tocsr() - alpha * m_mat.tocsr().transpose()
Esempio n. 14
0
 def dump_data(self, time_step: Optional[datetime], x_analysis: np.array,
               p_analysis: np.array, p_analysis_inv: sp.coo_matrix,
               state_mask: np.array, n_params: int):
     data = []
     for param in self.param_positions:
         index = self.param_positions[param]
         param_values = np.zeros(state_mask.shape, dtype=np.float32)
         param_values[state_mask] = x_analysis[index::n_params]
         data.append(param_values)
         param_unc = np.zeros(state_mask.shape, dtype=np.float32)
         param_unc[state_mask] = 1. / np.sqrt(
             p_analysis_inv.diagonal()[index::n_params])
         data.append(param_unc)
     self.writer.write(data, self._width, self._height, self._offset_x,
                       self._offset_y)
Esempio n. 15
0
    def _build_feat_tensor(self,
                           feat_matrix: sp.coo_matrix,
                           device: Optional[T.device] = T.device('cpu')):
        feat_index_list = feat_matrix.nonzero()
        feat_index_array: np.ndarray = np.vstack(feat_index_list)

        feat_index = T.tensor(feat_index_array.tolist(),
                              dtype=T.long,
                              device=device)
        feat_value = T.tensor(feat_matrix.data, dtype=T.double, device=device)
        feat_tensor = T.sparse_coo_tensor(feat_index,
                                          feat_value,
                                          size=feat_matrix.shape,
                                          device=device)

        return feat_tensor
Esempio n. 16
0
    def compute_transition_matrix(self,
                                  knn: sparse.coo_matrix,
                                  x: np.ndarray,
                                  v: np.ndarray,
                                  epsilon: float = 0.0,
                                  reverse: bool = False) -> sparse.csr_matrix:
        """
		Compute a right-stochastic matrix representing transition probabilities from each node
		
		Args:
			knn        KNN graph (n_cells, n_cells)
			x          Embedding positions (n_cells, n_dims)
			v          Velocities on the embedding (n_cells, n_dims)
			reverse    Compute the reverse transition matrix (for backwards diffusion)

		Remarks:
			Computes a Markov transition matrix for the KNN graph. The probability of transition along an edge
			is proportional to the scalar projection of the velocity vector onto that edge, times the reciprocal
			of the edge length. Edges that get negative scalar projections are clipped to zero and the total
			non-zero outgoing edges are normalized to a sum of 1.0.
		"""
        # vertices for each edge
        knn = knn.tocoo()
        (v0, v1) = (knn.row, knn.col)

        # calculate edge unit vectors
        uv = x[v1] - x[
            v0]  # Vector corresponding to an edge from v0 to v1, shape (n_edges, n_dims)
        norms = np.linalg.norm(uv, axis=1)
        uv = uv / norms[:, None]  # Convert to unit vector

        # Project the velocity vectors onto edges, and clip to zero
        scalar_projection = np.array([a.dot(b) for a, b in zip(v[v0], uv)
                                      ])  # Shape: (n_edges)
        if reverse:
            scalar_projection = -scalar_projection
        scalar_projection += epsilon
        # scalar_projection += scalar_projection.min()
        np.clip(scalar_projection, a_min=0, a_max=None, out=scalar_projection)

        # Calculate transition probabilities
        p = scalar_projection * (1 / norms)
        tr = normalize(sparse.coo_matrix((p, (v0, v1))).tocsr(),
                       axis=1,
                       norm='l1')
        return tr
Esempio n. 17
0
def reorder_rows(X: coo_matrix, idx: np.ndarray) -> None:
    """ Reorders the rows of the COO sparse matrix given in argument.

    Parameters
    ----------
    X : scipy.sparse.coo_matrix
        The sparse matrix to reorder.
    idx: numpy.ndarray,  shape=(X.shape[0],)
        Row indices used to reorder the matrix.
    """
    idx = idx.flatten()
    assert isinstance(
        X, scipy.sparse.coo_matrix
    ), "X must be scipy.sparse.coo_matrix"
    assert X.shape[0] == idx.shape[0], "idx shape[0] must be X shape[0]"
    idx = np.argsort(idx)
    idx = np.asarray(idx, dtype=X.row.dtype)
    X.row = idx[X.row]
Esempio n. 18
0
    def _hu_moments(roi: coo_matrix) -> np.ndarray:
        """Returns the 7 Hu moments for an ROI image. See 
        https://scikit-image.org/docs/0.17.x/api/skimage.measure.html#moments-hu        # noqa
        for more information.

        Returns
        -------
        7-element, 1d np.array of Hu's image moments

        References
        ----------
        M. K. Hu, “Visual Pattern Recognition by Moment Invariants”, 
        IRE Trans. Info. Theory, vol. IT-8, pp. 179-187, 1962
        """
        roi_image = roi.toarray()
        mu = moments_central(roi_image)
        nu = moments_normalized(mu)
        return moments_hu(nu)
Esempio n. 19
0
def laplacian_score(X: np.ndarray, W: sparse.coo_matrix) -> np.ndarray:
	"""
	This function implements the laplacian score feature selection, steps are as follows:
	1. Construct the affinity matrix W if it is not specified
	2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W
	3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones)
	4. Laplacian score for the r-th feature is score = (fr_hat'*L*fr_hat)/(fr_hat'*D*fr_hat)
	Input
	-----
	X: {numpy array}, shape (n_samples, n_features)
		input data
	W: {sparse matrix}, shape (n_samples, n_samples)
		input affinity matrix

	Output
	------
	score: {numpy array}, shape (n_features,)
		laplacian score for each feature

	Reference
	---------
	He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005.
	"""

	# build the diagonal D matrix from affinity matrix W
	D = np.array(W.sum(axis=1))
	L = W
	tmp = np.dot(np.transpose(D), X)
	D = sparse.diags(np.transpose(D), [0])
	Xt = np.transpose(X)
	t1 = np.transpose(np.dot(Xt, D.todense()))
	t2 = np.transpose(np.dot(Xt, L.todense()))
	# compute the numerator of Lr
	D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp) / D.sum()
	# compute the denominator of Lr
	L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp) / D.sum()
	# avoid the denominator of Lr to be 0
	D_prime[D_prime < 1e-12] = 10000

	# compute laplacian score for all features
	score = 1 - np.array(np.multiply(L_prime, 1 / D_prime))[0, :]
	return np.transpose(score)
Esempio n. 20
0
    def layout_knn(self, knn: sparse.coo_matrix) -> np.ndarray:
        edges = np.stack((knn.row, knn.col), axis=1)

        # Calculate Jaccard similarities
        js = []  # type: List[float]
        knncsr = knn.tocsr()
        for i, j in edges:
            r = knncsr.getrow(i)
            c = knncsr.getrow(j)
            shared = r.minimum(c).nnz
            total = r.maximum(c).nnz
            js.append(shared / total)
        weights = np.array(js) + 0.00001  # OpenOrd doesn't like 0 weights

        self.graph = nx.Graph()
        self.graph.add_nodes_from(range(knn.shape[0]))
        for i, edge in enumerate(edges):
            self.graph.add_edge(edge[0], edge[1], {'weight': weights[i]})

        return self.layout(self.graph)
Esempio n. 21
0
    def _iter_ids(self, ids: ndarray, mat: coo_matrix,
                  n_dim: int) -> Iterator[ndarray]:
        """
        Iterate over metadata vectors of size 'n_dim' encoded in 'mat'.

        If too few metadata elements are found to satisfy 'n_dim' requirement, the
        vectors will be padded with zeros to ensure homogeneity.

        See Also
        --------
        Dataset._iter_meta

        """

        if mat is not None:
            yield from self._iter_meta(ids, mat.tocsr(), n_dim)
        elif n_dim > 1:
            ids = np.c_[ids, np.zeros((len(ids), n_dim - 1), dtype=int)]
            yield from (_ for _ in ids)
        else:
            yield from (_ for _ in ids.reshape(-1, 1))
Esempio n. 22
0
    def fit_predict(self, knn: sparse.coo_matrix) -> np.ndarray:
        """
		Given a sparse adjacency matrix, perform Louvain-Jaccard clustering

		Args:
			knn:	The sparse adjacency matrix

		Returns:
			labels:	The cluster labels

		Remarks:
			After clustering, the Louvain-Jaccard weighted undirected graph is available as
			the property 'graph' of type nx.Graph, and also in the form of a sparse adjacency
			matrix as the property 'lj_knn' of type scipy.sparse.coo_matrix
		"""
        if self.jaccard:
            edges = np.stack((knn.row, knn.col), axis=1)
            # Calculate Jaccard similarities
            js = []  # type: List[float]
            knncsr = knn.tocsr()
            for i, j in edges:
                r = knncsr.getrow(i)
                c = knncsr.getrow(j)
                shared = r.minimum(c).nnz
                total = r.maximum(c).nnz
                if total > 0:
                    js.append(shared / total)
                else:
                    js.append(0)
            weights = np.array(js) + 0.00001  # OpenOrd doesn't like 0 weights

            self.lj_knn = sparse.coo_matrix((weights, (knn.row, knn.col)))
            self.graph = nx.Graph()
            for i, edge in enumerate(edges):
                self.graph.add_edge(edge[0], edge[1], {'weight': weights[i]})
        else:
            self.graph = nx.from_scipy_sparse_matrix(knn)
        partitions = community.best_partition(self.graph,
                                              resolution=self.resolution)
        return np.array([partitions[key] for key in range(knn.shape[0])])
Esempio n. 23
0
def meanFilterSparse(a: sp.coo_matrix, h: int):
    """Apply a mean filter to an input sparse matrix. This convolves
    the input with a kernel of size 2*h + 1 with constant entries and
    subsequently reshape the output to be of the same shape as input

    Args:
        a: `sp.coo_matrix`, Input matrix to be filtered
        h: `int` half-size of the filter

    Returns:
        `sp.coo_matrix` filterd matrix
    """
    assert h > 0, "meanFilterSparse half-size must be greater than 0"
    assert sp.issparse(a) and a.getformat() == 'coo',\
        "meanFilterSparse input matrix is not scipy.sparse.coo_matrix"
    assert a.shape[0] == a.shape[1],\
        "meanFilterSparse cannot handle non-square matrix"
    fSize = 2 * h + 1
    # filter is a square matrix of constant 1 of shape (fSize, fSize)
    shapeOut = np.array(a.shape) + fSize - 1
    mToeplitz = sp.diags(np.ones(fSize),
                         np.arange(-fSize + 1, 1),
                         shape=(shapeOut[1], a.shape[1]),
                         format='csr')
    ans = sp.coo_matrix((mToeplitz @ a) @ mToeplitz.T)
    # remove the edges since we don't care about them if we are smoothing
    # the matrix itself
    ansNoEdge = ans.tocsr()[h:(h + a.shape[0]), h:(h + a.shape[1])].tocoo()
    # Assign different number of neighbors to the edge to better
    # match what the original R implementation of HiCRep does
    rowDist2Edge = np.minimum(ansNoEdge.row,
                              ansNoEdge.shape[0] - 1 - ansNoEdge.row)
    nDim1 = h + 1 + np.minimum(rowDist2Edge, h)
    colDist2Edge = np.minimum(ansNoEdge.col,
                              ansNoEdge.shape[1] - 1 - ansNoEdge.col)
    nDim2 = h + 1 + np.minimum(colDist2Edge, h)
    nNeighbors = nDim1 * nDim2
    ansNoEdge.data /= nNeighbors
    return ansNoEdge
Esempio n. 24
0
def crop_roi_mask(roi_mask: coo_matrix) -> coo_matrix:
    """Crop ROI mask into smallest rectangle that fits all nonzero elements

    Parameters
    ----------
    roi_mask : coo_matrix

    Returns
    -------
    coo_matrix
        A cropped ROI mask or None if coo_matrix is empty

    """

    bounds = roi_bounds(roi_mask)
    if bounds is None:
        return None

    # Convert coo to csr matrix so we can take advantage of indexing
    cropped_mask = roi_mask.tocsr()[bounds[0]:bounds[1], bounds[2]:bounds[3]]

    return cropped_mask.tocoo()
Esempio n. 25
0
def get_lshash(text: coo_matrix) -> List[str]:
    """
    Return list of cosine LSHs encoding text.
    """
    def cosine_LSH(vector, planes):
        """
        Return a single cosine LSH for a particular record and given planes.
        """
        sig = 0
        for plane in planes:
            sig <<= 1
            if vector.dot(plane) >= 0:
                sig |= 1
        return str(sig)

    bits = 512
    random_projections = np.random.randn(bits, text.shape[1])
    hashes = [
        cosine_LSH(text.getrow(idx), random_projections)
        for idx in range(text.shape[0])
    ]
    return hashes
Esempio n. 26
0
def truncate_unrated_movies(um: coo_matrix,
                            row2uid: np.ndarray,
                            col2mid: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Truncate movies that have not at all rated.

    Arguments:
        um {coo_matrix} -- U*M rating table where U is the number of users
            whereas M is the number of movies.
        row2uid {np.ndarray} -- mapping from row index to USER_ID.
        col2mid {np.ndarray} -- mapping from col index to MOVIE_ID.

    Returns:
        Tuple[coo_matrix, np.ndarray, np.ndarray] -- coo_matrix: U*M_trunc sparse
                rating tables.
            np.ndarray, np.ndarray: MOVIE_ID mapping after truncation and USER_ID
                mapping.
    """
    movie_ratings = um.tocsc()
    num_ratings_per_movie = np.sum(movie_ratings != 0, axis=0)
    movie_with_no_ratings = np.squeeze(np.asarray(num_ratings_per_movie != 0))
    return movie_ratings[:, movie_with_no_ratings].tocoo(), \
           row2uid, \
           col2mid[movie_with_no_ratings]
Esempio n. 27
0
 def getDegrees(mtx: sp.coo_matrix) -> int:
     return np.array(mtx.sum(axis=0)).squeeze()
Esempio n. 28
0
def stage_data(
        spots: pd.DataFrame,
        coo: coo_matrix) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Reads the spots and the label image that are passed in and calculates which cell (if any) encircles any
    given spot within its boundaries. It also retrieves the coordinates of the cell boundaries, the cell
    centroids and the cell area
    """
    logger.info(' Number of spots passed-in: %d' % spots.shape[0])
    logger.info(' Number of segmented cells: %d' % len(set(coo.data)))
    logger.info(
        ' Segmentation array implies that image has width: %dpx and height: %dpx'
        % (coo.shape[1], coo.shape[0]))
    mask_x = (spots.x >= 0) & (spots.x <= coo.shape[1])
    mask_y = (spots.y >= 0) & (spots.y <= coo.shape[0])
    spots = spots[mask_x & mask_y]

    # Debugging code!
    # resuffle
    # spots = spots.sample(frac=1).reset_index(drop=True)

    # _point = [5471-14, 110]
    # logger.info('label at (y, x): (%d, %d) is %d' % (_point[0], _point[1], coo.toarray()[_point[0], _point[1]]))

    # coo = remap_labels(coo)
    # logger.info('remapped label at (y, x): (%d, %d) is %d' % (_point[0], _point[1], coo.toarray()[_point[0], _point[1]]))

    # 1. Find which cell the spots lie within
    yx_coords = spots[['y', 'x']].values.T
    inc = inside_cell(coo.tocsr(), yx_coords)
    spots = spots.assign(label=inc)

    # 2. Get cell centroids and area
    props = skmeas.regionprops(coo.toarray().astype(np.int32))
    props_df = pd.DataFrame(data=[
        (d.label, d.area, d.centroid[1], d.centroid[0]) for d in props
    ],
                            columns=['label', 'area', 'x_cell', 'y_cell'])

    # 3. Get the cell boundaries
    cell_boundaries = extract_borders_dip(coo.toarray().astype(np.uint32), 0,
                                          0, [0])

    assert props_df.shape[0] == cell_boundaries.shape[0] == coo.data.max()
    assert set(spots.label[spots.label > 0]) <= set(props_df.label)

    cells = props_df.merge(cell_boundaries)
    cells.sort_values(by=['label', 'x_cell', 'y_cell'])
    assert cells.shape[0] == cell_boundaries.shape[0] == props_df.shape[0]

    # join spots and cells on the cell label so you can get the x,y coords of the cell for any given spot
    spots = spots.merge(cells, how='left', on=['label'])

    _cells = cells[['label', 'area', 'x_cell', 'y_cell']].rename(columns={
        'x_cell': 'x',
        'y_cell': 'y'
    })
    _cell_boundaries = cells[['label', 'coords']]
    _spots = spots[['x', 'y', 'label', 'Gene', 'x_cell',
                    'y_cell']].rename(columns={
                        'Gene': 'target',
                        'x': 'x_global',
                        'y': 'y_global'
                    })

    return _cells, _cell_boundaries, _spots
Esempio n. 29
0
 def explicit_to_implicit(rating_mtx: coo_matrix):
     rating_mtx.data = np.ones(shape=(rating_mtx.nnz, ), dtype=np.float)
Esempio n. 30
0
def get_entity_corr_coef(interactions: coo_matrix,
                         entity_id: int,
                         entity_type: str,
                         embeddings: dict,
                         ignore_sparse_zeros=True,
                         use_zero_mean=False,
                         corr_type='pearson',
                         neg_sampling=False,
                         check_normal_dist=True):
    """
    Assumes a rating matrix with rows for users and columns for items
    """
    p = embeddings['user'].shape[1]
    cov_for_p_variables = []

    if entity_type == 'user':
        embed = embeddings['user'][entity_id]
        # embedding used for covariance computation
        cov_embed = embeddings['item']
        # ratings used for covariance computation
        ratings = np.squeeze(
            np.asarray(interactions.tocsr()[entity_id, :].todense()))
    elif entity_type == 'item':
        embed = embeddings['item'][entity_id]
        # embedding used for covariance computation
        cov_embed = embeddings['user']
        # ratings used for covariance computation
        ratings = np.squeeze(
            np.asarray(interactions.tocsr()[:, entity_id].todense()))

    if ignore_sparse_zeros:
        idx = np.where(ratings != 0)[0]
        ratings = ratings[idx]

    # TODO: Use `sample_items` method
    # Use this for BPR
    if neg_sampling:
        if entity_type == 'user':
            n_sample = interactions.shape[1]
        else:
            n_sample = interactions.shape[0]
        neg_idx = np.random.randint(n_sample, size=len(idx))
        # neg_idx = np.random.choice(np.setdiff1d(np.arange(interactions.n_items),
        #                                         pos_idx), size=len(pos_idx),
        #                            replace=False)
        neg_ratings = [0] * len(ratings)
        idx = np.concatenate([idx, neg_idx])
        ratings = np.concatenate([ratings, neg_ratings])

    cov_embed = cov_embed[idx]

    for k in range(p):
        cov_embed_latent_variables_at_k = cov_embed[:, k]
        cov_mat_for_k = get_cov(ratings,
                                cov_embed_latent_variables_at_k,
                                use_zero_mean=use_zero_mean)
        cov_for_k = cov_mat_for_k[0, 1]
        cov_for_p_variables.append(cov_for_k)

    # TODO: Change from printing back to logging
    if check_normal_dist:
        alpha = 1e-3
        p_embed = normaltest(embed)[1]
        p_cov_for_p_variables = normaltest(cov_for_p_variables)[1]
        if p_embed < alpha:
            print(
                f"{entity_type}-{entity_id}: Entity Embeddings are unlikely normally distributed."
            )
        if p_cov_for_p_variables < alpha:
            print(
                f"{entity_type}-{entity_id}: Covariances are unlikely normally distributed."
            )

    cov_for_p_variables = np.array(cov_for_p_variables)
    corr_coef = get_corr_coef(embed, cov_for_p_variables, corr_type=corr_type)

    return corr_coef