def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new nodes, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() eigenvectors = self.eigenvectors_ eigenvalues = self.eigenvalues_ n = eigenvectors.shape[0] adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) # regularization if self.regularization_: adjacency_vectors = RegularizedAdjacency(adjacency_vectors, self.regularization_) # projection in the embedding space averaging = normalize(adjacency_vectors, p=1) embedding_vectors = averaging.dot(eigenvectors) if not self.barycenter: if self.normalized_laplacian: factors = 1 - eigenvalues else: # to be modified factors = 1 - eigenvalues / (adjacency_vectors.sum() + 1e-9) factors_inv_diag = diag_pinv(factors) embedding_vectors = factors_inv_diag.dot(embedding_vectors.T).T if self.equalize: embedding_vectors = diag_pinv(np.sqrt(eigenvalues)).dot( embedding_vectors.T).T if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) if embedding_vectors.shape[0] == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors
def _secondary_outputs(self, biadjacency): """Compute different variables from labels_.""" if self.return_membership: membership_row = membership_matrix(self.labels_row_) membership_col = membership_matrix(self.labels_col_) self.membership_row_ = normalize(biadjacency.dot(membership_col)) self.membership_col_ = normalize(biadjacency.T.dot(membership_row)) if self.return_aggregate: membership_row = membership_matrix(self.labels_row_) membership_col = membership_matrix(self.labels_col_) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self
def fit( self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'LaplacianEmbedding': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph (symmetric matrix). Returns ------- self: :class:`LaplacianEmbedding` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) check_symmetry(adjacency) n = adjacency.shape[0] regularize: bool = not (self.regularization is None or self.regularization == 0.) check_scaling(self.scaling, adjacency, regularize) if regularize: solver: EigSolver = LanczosEig() else: solver = set_solver(self.solver, adjacency) n_components = 1 + check_n_components(self.n_components, n - 2) weights = adjacency.dot(np.ones(n)) regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * weights.sum() / n**2 weights += regularization * n laplacian = LaplacianOperator(adjacency, regularization) else: weight_diag = sparse.diags(weights, format='csr') laplacian = weight_diag - adjacency solver.which = 'SM' solver.fit(matrix=laplacian, n_components=n_components) eigenvalues = solver.eigenvalues_[1:] eigenvectors = solver.eigenvectors_[:, 1:] embedding = eigenvectors.copy() if self.scaling: eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling) embedding = eigenvalues_inv_diag.dot(embedding.T).T if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors self.regularization_ = regularization return self
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new rows, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() embedding = self.embedding_ n = embedding.shape[0] adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) embedding_vectors = normalize(adjacency_vectors).dot(embedding) if embedding_vectors.shape[0] == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors
def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new nodes, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() eigenvectors = self.eigenvectors_ eigenvalues = self.eigenvalues_ n = eigenvectors.shape[0] adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) # regularization if self.regularization_: adjacency_vectors = RegularizedAdjacency(adjacency_vectors, self.regularization_) # projection in the embedding space averaging = normalize(adjacency_vectors, p=1) embedding_vectors = averaging.dot(eigenvectors) embedding_vectors = diag_pinv(eigenvalues).dot(embedding_vectors.T).T if self.scaling: eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling) embedding_vectors = eigenvalues_inv_diag.dot(embedding_vectors.T).T if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) if embedding_vectors.shape[0] == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors
def _secondary_outputs(self, adjacency): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: membership = membership_matrix(self.labels_) if self.return_membership: self.membership_ = normalize(adjacency.dot(membership)) if self.return_aggregate: self.adjacency_ = sparse.csr_matrix( membership.T.dot(adjacency.dot(membership))) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) if self.node_order == 'random': np.random.shuffle(index_remain) elif self.node_order == 'decreasing': index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] elif self.node_order == 'increasing': index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] labels = -np.ones(n, dtype=np.int32) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=np.int32) indptr = adjacency.indptr.astype(np.int32) indices = adjacency.indices.astype(np.int32) if self.weighted: data = adjacency.data.astype(np.float32) else: data = np.ones(n, dtype=np.float32) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() labels = vote_update(indptr, indices, data, labels, index_remain) membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def _secondary_outputs(self, adjacency): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: if np.issubdtype(adjacency.data.dtype, np.bool_): adjacency = adjacency.astype(float) membership = membership_matrix(self.labels_) if self.return_membership: self.membership_ = normalize(adjacency.dot(membership)) if self.return_aggregate: self.adjacency_ = sparse.csr_matrix( membership.T.dot(adjacency.dot(membership))) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'RandomProjection': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`RandomProjection` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) n = adjacency.shape[0] random_generator = check_random_state(self.random_state) random_matrix = random_generator.normal(size=(n, self.n_components)) # make the matrix orthogonal random_matrix, _ = np.linalg.qr(random_matrix) factor = random_matrix embedding = factor.copy() if self.random_walk: transition = normalize(adjacency) else: transition = adjacency for t in range(self.n_iter): factor = self.alpha * transition.dot(factor) embedding += factor if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding return self
def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new rows, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() singular_vectors_right = self.singular_vectors_right_ singular_values = self.singular_values_ n_row, _ = self.embedding_row_.shape n_col, _ = self.embedding_col_.shape adjacency_vectors = check_adjacency_vector(adjacency_vectors, n_col) self._check_adj_vector(adjacency_vectors) # regularization if self.regularization_: adjacency_vectors = RegularizedAdjacency(adjacency_vectors, self.regularization_) # weighting weights_row = adjacency_vectors.dot(np.ones(n_col)) diag_row = diag_pinv(np.power(weights_row, self.factor_row)) diag_col = diag_pinv(np.power(self.weights_col_, self.factor_col)) adjacency_vectors = safe_sparse_dot( diag_row, safe_sparse_dot(adjacency_vectors, diag_col)) # projection in the embedding space averaging = adjacency_vectors embedding_vectors = diag_row.dot(averaging.dot(singular_vectors_right)) # scaling embedding_vectors /= np.power(singular_values, self.factor_singular) if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) if embedding_vectors.shape[0] == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors
def test_formats(self): n = 5 mat1 = normalize(np.eye(n)) mat2 = normalize(sparse.eye(n)) mat3 = normalize(CoNeighbor(mat2)) x = np.random.randn(n) self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - x), 0) self.assertAlmostEqual(np.linalg.norm(mat2.dot(x) - x), 0) self.assertAlmostEqual(np.linalg.norm(mat3.dot(x) - x), 0) mat1 = np.random.rand(n**2).reshape((n, n)) mat2 = sparse.csr_matrix(mat1) mat1 = normalize(mat1, p=2) mat2 = normalize(mat2, p=2) self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - mat2.dot(x)), 0) with self.assertRaises(NotImplementedError): normalize(mat3, p=2) with self.assertRaises(NotImplementedError): normalize(mat1, p=3)
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) labels = -np.ones(n, dtype=int) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=int) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() for i in index_remain: labels_ = labels[ adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]] labels_ = labels_[labels_ >= 0] if len(labels_): labels_unique, counts = np.unique(labels_, return_counts=True) labels[i] = labels_unique[np.argmax(counts)] membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def test_coneighbors(self): biadjacency = test_bigraph() operator = CoNeighbor(biadjacency) transition = normalize(operator) x = transition.dot(np.ones(transition.shape[1])) self.assertAlmostEqual(np.linalg.norm(x - np.ones(operator.shape[0])), 0) operator.astype('float') operator.right_sparse_dot(sparse.eye(operator.shape[1], format='csr')) operator1 = CoNeighbor(biadjacency, normalized=False) operator2 = CoNeighbor(biadjacency, normalized=False) x = np.random.randn(operator.shape[1]) x1 = (-operator1).dot(x) x2 = (operator2 * -1).dot(x) x3 = operator1.T.dot(x) self.assertAlmostEqual(np.linalg.norm(x1 - x2), 0) self.assertAlmostEqual(np.linalg.norm(x2 - x3), 0)
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Spectral': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph (symmetric matrix). Returns ------- self: :class:`Spectral` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) check_symmetry(adjacency) n = adjacency.shape[0] if self.solver == 'auto': solver = auto_solver(adjacency.nnz) if solver == 'lanczos': self.solver: EigSolver = LanczosEig() else: # pragma: no cover self.solver: EigSolver = HalkoEig() n_components = check_n_components(self.n_components, n - 2) n_components += 1 if self.equalize and (self.regularization is None or self.regularization == 0.) and not is_connected(adjacency): raise ValueError( "The option 'equalize' is valid only if the graph is connected or with regularization." "Call 'fit' either with 'equalize' = False or positive 'regularization'." ) weights = adjacency.dot(np.ones(n)) regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * weights.sum() / n**2 weights += regularization * n if self.normalized_laplacian: # Finding the largest eigenvalues of the normalized adjacency is easier for the solver than finding the # smallest eigenvalues of the normalized laplacian. weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights)) if regularization: norm_adjacency = NormalizedAdjacencyOperator( adjacency, regularization) else: norm_adjacency = weights_inv_sqrt_diag.dot( adjacency.dot(weights_inv_sqrt_diag)) self.solver.which = 'LA' self.solver.fit(matrix=norm_adjacency, n_components=n_components) eigenvalues = 1 - self.solver.eigenvalues_ # eigenvalues of the Laplacian in increasing order index = np.argsort(eigenvalues)[1:] # skip first eigenvalue eigenvalues = eigenvalues[index] # eigenvectors of the Laplacian, skip first eigenvector eigenvectors = np.array( weights_inv_sqrt_diag.dot(self.solver.eigenvectors_[:, index])) else: if regularization: laplacian = LaplacianOperator(adjacency, regularization) else: weight_diag = sparse.diags(weights, format='csr') laplacian = weight_diag - adjacency self.solver.which = 'SM' self.solver.fit(matrix=laplacian, n_components=n_components) eigenvalues = self.solver.eigenvalues_[1:] eigenvectors = self.solver.eigenvectors_[:, 1:] embedding = eigenvectors.copy() if self.equalize: eigenvalues_sqrt_inv_diag = diag_pinv(np.sqrt(eigenvalues)) embedding = eigenvalues_sqrt_inv_diag.dot(embedding.T).T if self.barycenter: eigenvalues_diag = sparse.diags(eigenvalues) subtract = eigenvalues_diag.dot(embedding.T).T if not self.normalized_laplacian: weights_inv_diag = diag_pinv(weights) subtract = weights_inv_diag.dot(subtract) embedding -= subtract if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors self.regularization_ = regularization return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD': """Compute the embedding of the graph. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. Returns ------- self: :class:`GSVD` """ adjacency = check_format(adjacency).asfptype() n_row, n_col = adjacency.shape n_components = check_n_components(self.n_components, min(n_row, n_col) - 1) if isinstance(self.solver, str): self.solver = set_svd_solver(self.solver, adjacency) regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * np.sum( adjacency.data) / (n_row * n_col) adjacency_reg = RegularizedAdjacency(adjacency, regularization) else: adjacency_reg = adjacency weights_row = adjacency_reg.dot(np.ones(n_col)) weights_col = adjacency_reg.T.dot(np.ones(n_row)) diag_row = diag_pinv(np.power(weights_row, self.factor_row)) diag_col = diag_pinv(np.power(weights_col, self.factor_col)) self.solver.fit( safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg, diag_col)), n_components) singular_values = self.solver.singular_values_ index = np.argsort(-singular_values) singular_values = singular_values[index] singular_vectors_left = self.solver.singular_vectors_left_[:, index] singular_vectors_right = self.solver.singular_vectors_right_[:, index] singular_left_diag = sparse.diags( np.power(singular_values, 1 - self.factor_singular)) singular_right_diag = sparse.diags( np.power(singular_values, self.factor_singular)) embedding_row = diag_row.dot(singular_vectors_left) embedding_col = diag_col.dot(singular_vectors_right) embedding_row = singular_left_diag.dot(embedding_row.T).T embedding_col = singular_right_diag.dot(embedding_col.T).T if self.normalized: embedding_row = normalize(embedding_row, p=2) embedding_col = normalize(embedding_col, p=2) self.embedding_row_ = embedding_row self.embedding_col_ = embedding_col self.embedding_ = embedding_row self.singular_values_ = singular_values self.singular_vectors_left_ = singular_vectors_left self.singular_vectors_right_ = singular_vectors_right self.regularization_ = regularization self.weights_col_ = weights_col return self
def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new nodes, when possible (otherwise return 0). Each new node is defined by its adjacency row vector. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. Example ------- >>> from sknetwork.embedding import Spectral >>> from sknetwork.data import karate_club >>> spectral = Spectral(n_components=3) >>> adjacency = karate_club() >>> adjacency_vector = np.arange(34) < 5 >>> _ = spectral.fit(adjacency) >>> len(spectral.predict(adjacency_vector)) 3 """ self._check_fitted() # input if self.bipartite: n = len(self.embedding_col_) else: n = len(self.embedding_) adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) if self.bipartite: shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0]) adjacency_vectors = sparse.csr_matrix(adjacency_vectors) adjacency_vectors = sparse.hstack( [sparse.csr_matrix(shape), adjacency_vectors], format='csr') eigenvectors = self.eigenvectors_ eigenvalues = self.eigenvalues_ # regularization if self.regularized: regularization = np.abs(self.regularization) else: regularization = 0 normalizer = Normalizer(adjacency_vectors, regularization) # prediction embedding_vectors = normalizer.dot(eigenvectors) normalized_laplacian = self.decomposition == 'rw' if normalized_laplacian: norm_vect = eigenvalues.copy() norm_vect[norm_vect == 0] = 1 embedding_vectors /= norm_vect else: norm_matrix = sparse.csr_matrix( 1 - np.outer(normalizer.norm_diag.data, eigenvalues)) norm_matrix.data = 1 / norm_matrix.data embedding_vectors *= norm_matrix.toarray() # normalization if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) # shape if len(embedding_vectors) == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Spectral': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph (symmetric matrix). Returns ------- self: :class:`Spectral` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) check_symmetry(adjacency) n = adjacency.shape[0] solver = set_solver(self.solver, adjacency) n_components = 1 + check_n_components(self.n_components, n - 2) regularize: bool = not (self.regularization is None or self.regularization == 0.) check_scaling(self.scaling, adjacency, regularize) weights = adjacency.dot(np.ones(n)) regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * weights.sum() / n**2 weights += regularization * n # Spectral decomposition of the normalized adjacency matrix weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights)) if regularization: norm_adjacency = NormalizedAdjacencyOperator( adjacency, regularization) else: norm_adjacency = weights_inv_sqrt_diag.dot( adjacency.dot(weights_inv_sqrt_diag)) solver.which = 'LA' solver.fit(matrix=norm_adjacency, n_components=n_components) eigenvalues = solver.eigenvalues_ index = np.argsort(-eigenvalues)[1:] # skip first eigenvalue eigenvalues = eigenvalues[index] eigenvectors = weights_inv_sqrt_diag.dot(solver.eigenvectors_[:, index]) embedding = eigenvectors.copy() if self.scaling: eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling) embedding = eigenvalues_inv_diag.dot(embedding.T).T if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors self.regularization_ = regularization return self
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Spectral': """Compute the graph embedding. If the input matrix :math:`B` is not square (e.g., biadjacency matrix of a bipartite graph) or not symmetric (e.g., adjacency matrix of a directed graph), use the adjacency matrix :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}` and return the embedding for both rows and columns of the input matrix :math:`B`. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : bool (default = ``False``) If ``True``, force the input matrix to be considered as a biadjacency matrix. Returns ------- self: :class:`Spectral` """ # input adjacency, self.bipartite = get_adjacency( input_matrix, allow_directed=False, force_bipartite=force_bipartite) n = adjacency.shape[0] # regularization regularization = self._get_regularization(self.regularization, adjacency) self.regularized = regularization > 0 # laplacian normalized_laplacian = self.decomposition == 'rw' laplacian = Laplacian(adjacency, regularization, normalized_laplacian) # spectral decomposition n_components = check_n_components(self.n_components, n - 2) + 1 solver = LanczosEig(which='SM') solver.fit(matrix=laplacian, n_components=n_components) index = np.argsort( solver.eigenvalues_)[1:] # increasing order, skip first eigenvalues = solver.eigenvalues_[index] eigenvectors = solver.eigenvectors_[:, index] if normalized_laplacian: eigenvectors = laplacian.norm_diag.dot(eigenvectors) eigenvalues = 1 - eigenvalues # embedding embedding = eigenvectors.copy() if self.normalized: embedding = normalize(embedding, p=2) # output self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors if self.bipartite: self._split_vars(input_matrix.shape) return self
def cosine_modularity(adjacency, embedding: np.ndarray, embedding_col=None, resolution=1., weights='degree', return_all: bool = False): """Quality metric of an embedding :math:`x` defined by: :math:`Q = \\sum_{ij}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right) \\left(\\dfrac{1 + \\cos(x_i, x_j)}{2}\\right)` where * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n * :math:`w = 1^TA1` is the total weight of the graph. For bipartite graphs with column embedding :math:`y`, the metric is :math:`Q = \\sum_{ij}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right) \\left(\\dfrac{1 + \\cos(x_i, y_j)}{2}\\right)` where * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n * :math:`w = 1^TB1` is the total weight of the graph. Parameters ---------- adjacency : Adjacency matrix of the graph. embedding : Embedding of the nodes. embedding_col : Embedding of the columns (for bipartite graphs). resolution : Resolution parameter. weights : ``'degree'`` or ``'uniform'`` Weights of the nodes. return_all : If ``True``, also return fit and diversity Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.embedding import cosine_modularity >>> from sknetwork.data import karate_club >>> graph = karate_club(metadata=True) >>> adjacency = graph.adjacency >>> embedding = graph.position >>> np.round(cosine_modularity(adjacency, embedding), 2) 0.35 """ adjacency = check_format(adjacency) total_weight: float = adjacency.data.sum() if embedding_col is None: check_square(adjacency) embedding_col = embedding.copy() embedding_row_norm = normalize(embedding, p=2) embedding_col_norm = normalize(embedding_col, p=2) probs_row = check_probs(weights, adjacency) probs_col = check_probs(weights, adjacency.T) if isinstance(embedding_row_norm, sparse.csr_matrix) and isinstance(embedding_col_norm, sparse.csr_matrix): fit: float = 0.5 * (1 + (embedding_row_norm.multiply(adjacency.dot(embedding_col_norm))).sum() / total_weight) else: fit: float = 0.5 * ( 1 + (np.multiply(embedding_row_norm, adjacency.dot(embedding_col_norm))).sum() / total_weight) div: float = 0.5 * (1 + (embedding.T.dot(probs_row)).dot(embedding_col.T.dot(probs_col))) if return_all: return fit, div, fit - resolution * div else: return fit - resolution * div
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD': """Compute the GSVD of the adjacency or biadjacency matrix. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. Returns ------- self: :class:`GSVD` """ adjacency = check_format(adjacency).asfptype() n_row, n_col = adjacency.shape if self.n_components > min(n_row, n_col) - 1: n_components = min(n_row, n_col) - 1 warnings.warn( Warning( "The dimension of the embedding must be strictly less than the number of rows " "and the number of columns. Changed accordingly.")) else: n_components = self.n_components if self.solver == 'auto': solver = auto_solver(adjacency.nnz) if solver == 'lanczos': self.solver: SVDSolver = LanczosSVD() else: self.solver: SVDSolver = HalkoSVD() regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * np.sum( adjacency.data) / (n_row * n_col) adjacency_reg = SparseLR( adjacency, [(regularization * np.ones(n_row), np.ones(n_col))]) else: adjacency_reg = adjacency weights_row = adjacency_reg.dot(np.ones(n_col)) weights_col = adjacency_reg.T.dot(np.ones(n_row)) diag_row = diag_pinv(np.power(weights_row, self.factor_row)) diag_col = diag_pinv(np.power(weights_col, self.factor_col)) self.solver.fit( safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg, diag_col)), n_components) singular_values = self.solver.singular_values_ index = np.argsort(-singular_values) singular_values = singular_values[index] singular_vectors_left = self.solver.singular_vectors_left_[:, index] singular_vectors_right = self.solver.singular_vectors_right_[:, index] singular_left_diag = sparse.diags( np.power(singular_values, 1 - self.factor_singular)) singular_right_diag = sparse.diags( np.power(singular_values, self.factor_singular)) embedding_row = diag_row.dot(singular_vectors_left) embedding_col = diag_col.dot(singular_vectors_right) embedding_row = singular_left_diag.dot(embedding_row.T).T embedding_col = singular_right_diag.dot(embedding_col.T).T if self.normalized: embedding_row = normalize(embedding_row, p=2) embedding_col = normalize(embedding_col, p=2) self.embedding_row_ = embedding_row self.embedding_col_ = embedding_col self.embedding_ = embedding_row self.singular_values_ = singular_values self.singular_vectors_left_ = singular_vectors_left self.singular_vectors_right_ = singular_vectors_right self.regularization_ = regularization self.weights_col_ = weights_col return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiKMeans': """Apply embedding method followed by clustering to the graph. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiKMeans` """ n_row, n_col = biadjacency.shape check_n_clusters(self.n_clusters, n_row) method = self.embedding_method method.fit(biadjacency) if self.co_cluster: embedding = np.vstack( (method.embedding_row_, method.embedding_col_)) else: embedding = method.embedding_ kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ self.labels_ = labels if self.co_cluster: self._split_vars(n_row) else: self.labels_row_ = labels if self.return_membership: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) self.membership_row_ = normalize( biadjacency.dot(membership_col)) self.membership_col_ = normalize( biadjacency.T.dot(membership_row)) else: self.membership_row_ = normalize( biadjacency.dot(biadjacency.T.dot(membership_row))) self.membership_ = self.membership_row_ if self.return_aggregate: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self