def test_lanczos(self): solver = LanczosSVD() solver.fit(self.biadjacency, 2) self.assertEqual(len(solver.singular_values_), 2) self.assertAlmostEqual(svd_err(self.biadjacency, solver.singular_vectors_left_, solver.singular_vectors_right_, solver.singular_values_), 0) solver.fit(self.slr, 2) self.assertEqual(len(solver.singular_values_), 2) self.assertAlmostEqual(svd_err(self.slr, solver.singular_vectors_left_, solver.singular_vectors_right_, solver.singular_values_), 0)
def test_compare_solvers(self): lanczos = LanczosSVD() halko = HalkoSVD() lanczos.fit(self.biadjacency, 2) halko.fit(self.biadjacency, 2) self.assertAlmostEqual(np.linalg.norm(lanczos.singular_values_ - halko.singular_values_), 0.) lanczos.fit(self.slr, 2) halko.fit(self.slr, 2) self.assertAlmostEqual(np.linalg.norm(lanczos.singular_values_ - halko.singular_values_), 0.)
class BiSpectral(): def __init__(self, embedding_dimension=2, regularization=0.001): self.embedding_dimension = embedding_dimension self.regularization = regularization def returnNormalized(self, adjacency): n1, n2 = adjacency.shape #total weight heuristic stated in De Lara (2019) adjacency = SparseLR( adjacency, [(self.regularization * np.ones(n1), np.ones(n2))]) #left side of normalized laplacian (squared later) w_row = adjacency.dot(np.ones(adjacency.shape[1])) #right side of normalized laplacian (squared later) w_col = (adjacency.T).dot(np.ones(adjacency.shape[0])) self.diag_row = diag_pinv(np.sqrt(w_row)) self.diag_col = diag_pinv(np.sqrt(w_col)) normalized_adj = safe_sparse_dot( self.diag_row, safe_sparse_dot(adjacency, self.diag_col)) return normalized_adj def fit(self, adjacency): self.solver = LanczosSVD() n_components = self.embedding_dimension + 1 # first eigenvector/value is doing nothing self.normalized_adj = self.returnNormalized(adjacency) # fitting and embedding self.solver.fit(self.normalized_adj, n_components) index = np.argsort(-self.solver.singular_values_) self.singular_values_ = self.solver.singular_values_[index[1:]] self.row_embedding_ = self.solver.left_singular_vectors_[:, index[1:]] self.col_embedding_ = self.solver.right_singular_vectors_[:, index[1:]] self.embedding_ = np.vstack((self.row_embedding_, self.col_embedding_)) return self
class GSVD(BaseEmbedding): """Graph embedding by Generalized Singular Value Decomposition of the adjacency or biadjacency matrix :math:`A`. This is equivalent to the Singular Value Decomposition of the matrix :math:`D_1^{- \\alpha_1}AD_2^{- \\alpha_2}` where :math:`D_1, D_2` are the diagonal matrices of row weights and columns weights, respectively, and :math:`\\alpha_1, \\alpha_2` are parameters. Parameters ----------- n_components : int Dimension of the embedding. regularization : ``None`` or float (default = ``None``) Regularization factor :math:`\\alpha` so that the matrix is :math:`A + \\alpha \\frac{11^T}{n}`. factor_row : float (default = 0.5) Power factor :math:`\\alpha_1` applied to the diagonal matrix of row weights. factor_col : float (default = 0.5) Power factor :math:`\\alpha_2` applied to the diagonal matrix of column weights. factor_singular : float (default = 0.) Parameter :math:`\\alpha` applied to the singular values on right singular vectors. The embedding of rows and columns are respectively :math:`D_1^{- \\alpha_1}U \\Sigma^{1-\\alpha}` and :math:`D_2^{- \\alpha_2}V \\Sigma^\\alpha` where: * :math:`U` is the matrix of left singular vectors, shape (n_row, n_components) * :math:`V` is the matrix of right singular vectors, shape (n_col, n_components) * :math:`\\Sigma` is the diagonal matrix of singular values, shape (n_components, n_components) normalized : bool (default = ``True``) If ``True``, normalized the embedding so that each vector has norm 1 in the embedding space, i.e., each vector lies on the unit sphere. solver : ``'lanczos'`` (Lanczos algorithm, default) or :class:`SVDSolver` (custom solver) Which solver to use. Attributes ---------- embedding_ : array, shape = (n, n_components) Embedding of the nodes. embedding_row_ : array, shape = (n_row, n_components) Embedding of the rows, for bipartite graphs. embedding_col_ : array, shape = (n_col, n_components) Embedding of the columns, for bipartite graphs. singular_values_ : np.ndarray, shape = (n_components) Singular values. singular_vectors_left_ : np.ndarray, shape = (n_row, n_components) Left singular vectors. singular_vectors_right_ : np.ndarray, shape = (n_col, n_components) Right singular vectors. weights_col_ : np.ndarray, shape = (n2) Weights applied to columns. Example ------- >>> from sknetwork.embedding import GSVD >>> from sknetwork.data import karate_club >>> gsvd = GSVD() >>> adjacency = karate_club() >>> embedding = gsvd.fit_transform(adjacency) >>> embedding.shape (34, 2) References ---------- Abdi, H. (2007). `Singular value decomposition (SVD) and generalized singular value decomposition. <https://www.cs.cornell.edu/cv/ResearchPDF/Generalizing%20The%20Singular%20Value%20Decomposition.pdf>`_ Encyclopedia of measurement and statistics, 907-912. """ def __init__(self, n_components=2, regularization: Union[None, float] = None, factor_row: float = 0.5, factor_col: float = 0.5, factor_singular: float = 0., normalized: bool = True, solver: Union[str, SVDSolver] = 'lanczos'): super(GSVD, self).__init__() self.n_components = n_components if regularization == 0: self.regularization = None else: self.regularization = regularization self.factor_row = factor_row self.factor_col = factor_col self.factor_singular = factor_singular self.normalized = normalized self.solver = solver self.singular_values_ = None self.singular_vectors_left_ = None self.singular_vectors_right_ = None self.regularization_ = None self.weights_col_ = None def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD': """Compute the embedding of the graph. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`GSVD` """ self._init_vars() adjacency = check_format(input_matrix).asfptype() n_row, n_col = adjacency.shape n_components = check_n_components(self.n_components, min(n_row, n_col) - 1) if isinstance(self.solver, str): self.solver = LanczosSVD() regularization = self.regularization if regularization: adjacency_reg = Regularizer(adjacency, regularization) else: adjacency_reg = adjacency weights_row = adjacency_reg.dot(np.ones(n_col)) weights_col = adjacency_reg.T.dot(np.ones(n_row)) diag_row = diag_pinv(np.power(weights_row, self.factor_row)) diag_col = diag_pinv(np.power(weights_col, self.factor_col)) self.solver.fit( safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg, diag_col)), n_components) singular_values = self.solver.singular_values_ index = np.argsort(-singular_values) singular_values = singular_values[index] singular_vectors_left = self.solver.singular_vectors_left_[:, index] singular_vectors_right = self.solver.singular_vectors_right_[:, index] singular_left_diag = sparse.diags( np.power(singular_values, 1 - self.factor_singular)) singular_right_diag = sparse.diags( np.power(singular_values, self.factor_singular)) embedding_row = diag_row.dot(singular_vectors_left) embedding_col = diag_col.dot(singular_vectors_right) embedding_row = singular_left_diag.dot(embedding_row.T).T embedding_col = singular_right_diag.dot(embedding_col.T).T if self.normalized: embedding_row = normalize(embedding_row, p=2) embedding_col = normalize(embedding_col, p=2) self.embedding_row_ = embedding_row self.embedding_col_ = embedding_col self.embedding_ = embedding_row self.singular_values_ = singular_values self.singular_vectors_left_ = singular_vectors_left self.singular_vectors_right_ = singular_vectors_right self.weights_col_ = weights_col return self @staticmethod def _check_adj_vector(adjacency_vectors): check_nonnegative(adjacency_vectors) def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new rows, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() singular_vectors_right = self.singular_vectors_right_ singular_values = self.singular_values_ n_row, _ = self.embedding_row_.shape n_col, _ = self.embedding_col_.shape adjacency_vectors = check_adjacency_vector(adjacency_vectors, n_col) self._check_adj_vector(adjacency_vectors) # regularization if self.regularization: adjacency_vectors = Regularizer(adjacency_vectors, self.regularization) # weighting weights_row = adjacency_vectors.dot(np.ones(n_col)) diag_row = diag_pinv(np.power(weights_row, self.factor_row)) diag_col = diag_pinv(np.power(self.weights_col_, self.factor_col)) adjacency_vectors = safe_sparse_dot( diag_row, safe_sparse_dot(adjacency_vectors, diag_col)) # projection in the embedding space averaging = adjacency_vectors embedding_vectors = diag_row.dot(averaging.dot(singular_vectors_right)) # scaling embedding_vectors /= np.power(singular_values, self.factor_singular) if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) if len(embedding_vectors) == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors