예제 #1
0
 def similarity_from_sparse(matrix_a: sparse.csr_matrix,
                            matrix_b: sparse.csr_matrix):
     intersection = matrix_a.dot(matrix_b.transpose()).toarray()
     norm_1 = np.array(matrix_a.multiply(matrix_a).sum(axis=1))
     norm_2 = np.array(matrix_b.multiply(matrix_b).sum(axis=1))
     union = norm_1 + norm_2.T - intersection
     return intersection / union
예제 #2
0
def _compute_sparse_minimizer(hat_vect_matrix: sparse.csr_matrix,
                              X: sparse.csr_matrix) -> np.ndarray:
    # compute numerator part of minimizer, this will yield a dense vector (size of u or v)
    minimizer = (hat_vect_matrix.multiply(X)).sum(axis=1)
    # divide by norm squared of masked vector
    vector_of_norms = (hat_vect_matrix.multiply(hat_vect_matrix)).sum(axis=1)
    # if some norm is 0 numerator will be 0 too (avoid 0/0)
    vector_of_norms[vector_of_norms < 1e-8] = 1
    minimizer = minimizer / vector_of_norms
    # return np.ndarray representation
    return minimizer.A
예제 #3
0
	def fit(self, X: np.ndarray, knn: sparse.csr_matrix, k: int) -> np.ndarray:
		"""
		Args:
			X 		Input vector of expression values, shape=(n_cells)
			knn		KNN connectivity matrix, shape=(n_cells, n_cells)
		
		Remarks:
			knn is assumed to have a single k for all cells (if not, the enrichment will
			be only approximate).
		"""
		n_cells = X.shape[0]
		nonzeros = (X > 0).astype('int')
		nz_bycell = knn.multiply(nonzeros).sum(axis=1)
		sum_bycell = knn.multiply(X).sum(axis=1)

		total_nz = nonzeros.sum()
		total_sum = X.sum()

		nz_enrichment = (nz_bycell / k + self.epsilon) / ((total_nz - nz_bycell) / (n_cells - k) + self.epsilon)
		mean_enrichment = (sum_bycell / k + self.epsilon) / ((total_sum - sum_bycell) / (n_cells - k) + self.epsilon)

		return (nz_enrichment.A * mean_enrichment.A).T[0]
예제 #4
0
 def normalize_adj(adj : sp.csr_matrix):
     """Normalize adjacency matrix and convert it to a sparse tensor."""
     if sp.isspmatrix(adj):
         adj = adj.tolil()
         adj.setdiag(1)
         adj = adj.tocsr()
         deg = np.ravel(adj.sum(1))
         deg_sqrt_inv = 1 / np.sqrt(deg)
         adj_norm = adj.multiply(deg_sqrt_inv[:, None]).multiply(deg_sqrt_inv[None, :])
     elif torch.is_tensor(adj):
         deg = adj.sum(1)
         deg_sqrt_inv = 1 / torch.sqrt(deg)
         adj_norm = adj * deg_sqrt_inv[:, None] * deg_sqrt_inv[None, :]
     return to_sparse_tensor(adj_norm)
예제 #5
0
def _compute_sparse_gradient(hat_vect_matrix: sparse.csr_matrix,
                             X: sparse.csr_matrix, z: np.ndarray,
                             y: np.ndarray) -> np.ndarray:
    # grad_z = (hat_vect_matrix.multiply(z @ y.T - X)).sum(axis=1) <-- compressed but memory inefficient (`A`=z @ y.T is dense) implementation
    # return grad_z.A
    # sparse matrix are represented by data, rows, cols indices
    sparse_X_tuple = (X.data, *X.nonzero())
    # create difference matrix sparse representation to avoid passing `hat_vect_matrix` as full dense matrix
    diff_matrix = sparse.csr_matrix(
        (_compute_sparse_difference_matrix(sparse_X_tuple, z, y)),
        shape=X.shape,
        dtype=z.dtype)

    # print("Norm-check", np.linalg.norm(hat_vect_matrix.multiply(diff_matrix).toarray() - hat_vect_matrix.multiply(z @ y.T - X).toarray() ) )

    # sum over rows (axis=1)
    return hat_vect_matrix.multiply(diff_matrix).sum(axis=1)
def Lloyd_iteration2( A,P, w ,Q):
    dists,Tags,_=squaredis(P,Q)
    print('finish squaredis')
    Qjl=SM((Q.shape[0],A.shape[1]))
    wq=np.zeros((Q.shape[0],1))
    w=np.reshape(w,(len(w),1))
    for i in range (Qjl.shape[0]):
            #print(i)
            inds=np.where(Tags==i)[0]  

            wmin=0
            wi=w[inds,:]-wmin
            Qjl[i,:]=(A[inds,:].multiply(wi)).sum(0)
            wq[i,:]=np.sum(wi,0)
    wq[wq==0]=1
    wqi=1/wq
    Qjl=Qjl.multiply(wqi+wmin)
    return SM(Qjl)
예제 #7
0
파일: plt.py 프로젝트: LennartKeller/exmlc
    def _assign_train_indices(self, tree: HuffmanTree, y: csr_matrix) -> HuffmanTree:
        """
        Assings indices of train data to the nodes of the label tree.
        :param tree: Label Tree
        :param y: Label Matrix
        :return: tree with data assigned
        """

        if self.verbose:
            print('Assigning train data to tree_ nodes')

        for node in tree.bfs_traverse():
            compare_row = np.zeros(y[0].shape).ravel()
            compare_row[node.label_idx] = 1
            compare_row = csr_matrix(compare_row)
            # do matrix vector multiplication to
            node.train_idx = y.multiply(compare_row)
            node.y = node.train_idx.max(axis=1).astype('int8').toarray().ravel()

        return tree
예제 #8
0
    def __call__(self, matrix: csr_matrix):
        SUM = matrix.sum()
        row_sum = matrix.sum(axis=1)
        col_sum = matrix.sum(axis=0)

        # do: 1 / each cell
        row_sum = _safe_divide(1, row_sum)
        col_sum = _safe_divide(1, col_sum)

        row_sum *= SUM

        if self.smooth:
            row_sum = np.power(row_sum, SMOOTH_POWER)

        res = matrix.multiply(row_sum).multiply(col_sum).tocsr()

        res.data = np.log(res.data)
        res.eliminate_zeros()

        return res
예제 #9
0
def sparse_average_precision_at_k(y_true: csr_matrix, y_scores: csr_matrix, k: int = 5) -> float:
    """
    Computes the average precision at k for sparse binary matrices.
    :param y_true: grounded truth in binary format (n_samples, n_labels)
    :param y_scores: predictions in representation that can be ranked (e.g. probabilities)
    :param k: top k labels to check
    :return: precision at k score
    """
    if y_true.shape != y_scores.shape:
        raise Exception('y_true and y_pred must have same shape')
    if y_true.shape[1] < k:
        raise Exception('Less labels than k')

    # get indices of k top values of y_pred
    top_idx = top_n_idx_sparse(y_scores, k)
    # create new matrix with shape == y_true.shape with only top ranked labels
    y_pred_binary_only_top = lil_matrix(y_true.shape, dtype='int8')
    for index, (binary_row, idx_row) in enumerate(zip(y_pred_binary_only_top, top_idx)):
        y_pred_binary_only_top[index, idx_row] = 1
    y_pred_binary_only_top = y_pred_binary_only_top.tocsr()
    # compute precision

    # get correct predicted labels
    correct_labelled = y_true.multiply(y_pred_binary_only_top)
    summed_precision = []

    for index, (row, score_row) in enumerate(zip(correct_labelled, y_scores)):
        # check special case that corresponding y_true row is empty => unlabeled instance
        if y_true[index].count_nonzero() == 0:
            # if no labels where predicted add 1 to sum
            if score_row.count_nonzero() == 0:
                summed_precision.append(1.0)
            else:
                summed_precision.append(0)
        else:
            summed_precision.append(row.count_nonzero() / k)

    return sum(summed_precision) / len(summed_precision)
예제 #10
0
def compute_tf_idf(doc_matrix: sparse.csr_matrix) -> sparse.csr_matrix:

    # 假设这里的 doc_matrix 已经是行和为 1,那么就只需计算 idf

    # 首先找出所有非零元的坐标
    i, nonzero_cols, v = sparse.find(doc_matrix)

    # 然后统计每一列的非零元的个数
    nonzero_cols, nonzero_col_appearences = np.unique(nonzero_cols,
                                                      return_counts=True)

    # 有些列可能全为 0,所以
    indicator = np.zeros(shape=(
        1,
        doc_matrix.shape[1],
    ), dtype=np.float32)
    indicator[0, nonzero_cols] = nonzero_col_appearences[:]
    n_articles = doc_matrix.shape[0]
    indicator = np.log(n_articles / indicator)

    # 此处为按元素乘
    tf_idf = sparse.csr_matrix(doc_matrix.multiply(indicator))

    return tf_idf
예제 #11
0
def compute_norms(matrix: sparse.csr_matrix) -> np.ndarray:
    """Computes norms for each row."""
    return np.sqrt(matrix.multiply(matrix).sum(axis=1).A).flatten()
예제 #12
0
def sparse_pos_clip(a: csr_matrix):
    # TODO: optimize
    return a.multiply(a > 0)
예제 #13
0
 def sparse_l2_norm(*, matrix: csr_matrix) -> np.array:
     """
     Return the l2 norm of an input csr sparse matrix.
     This is significantly faster and less memory intensive than simply passing the matrix to numpy.
     """
     return np.sqrt(np.sum(matrix.multiply(matrix), axis=1))