コード例 #1
0
    def fit(self, adjacency):
        self.solver = LanczosSVD()

        n_components = self.embedding_dimension + 1  # first eigenvector/value is doing nothing
        self.normalized_adj = self.returnNormalized(adjacency)

        # fitting and embedding
        self.solver.fit(self.normalized_adj, n_components)
        index = np.argsort(-self.solver.singular_values_)
        self.singular_values_ = self.solver.singular_values_[index[1:]]
        self.row_embedding_ = self.solver.left_singular_vectors_[:, index[1:]]
        self.col_embedding_ = self.solver.right_singular_vectors_[:, index[1:]]
        self.embedding_ = np.vstack((self.row_embedding_, self.col_embedding_))

        return self
コード例 #2
0
ファイル: svd.py プロジェクト: zhongkailv/scikit-network
    def __init__(self,
                 n_components=2,
                 regularization: Union[None, float] = None,
                 relative_regularization: bool = True,
                 factor_row: float = 0.5,
                 factor_col: float = 0.5,
                 factor_singular: float = 0.,
                 normalized: bool = True,
                 solver: Union[str, SVDSolver] = 'auto'):
        super(GSVD, self).__init__()

        self.n_components = n_components
        if regularization == 0:
            self.regularization = None
        else:
            self.regularization = regularization
        self.relative_regularization = relative_regularization
        self.factor_row = factor_row
        self.factor_col = factor_col
        self.factor_singular = factor_singular
        self.normalized = normalized
        if solver == 'halko':
            self.solver: SVDSolver = HalkoSVD()
        elif solver == 'lanczos':
            self.solver: SVDSolver = LanczosSVD()
        else:
            self.solver = solver

        self.singular_values_ = None
        self.singular_vectors_left_ = None
        self.singular_vectors_right_ = None
        self.regularization_ = None
        self.weights_col_ = None
コード例 #3
0
ファイル: hits.py プロジェクト: vishalbelsare/scikit-network
    def __init__(self, solver: Union[str, SVDSolver] = 'lanczos'):
        super(HITS, self).__init__()

        if type(solver) == str:
            self.solver: SVDSolver = LanczosSVD()
        else:
            self.solver = solver
コード例 #4
0
    def __init__(self, embedding_dimension=2, weights='degree', col_weights=None,
                 regularization: Union[None, float] = 0.01, relative_regularization: bool = True,
                 scaling: Union[None, str] = 'multiply', solver: Union[str, SVDSolver] = 'auto'):
        super(BiSpectral, self).__init__()

        self.embedding_dimension = embedding_dimension
        self.weights = weights
        if col_weights is None:
            col_weights = weights
        self.col_weights = col_weights
        self.regularization = regularization
        self.relative_regularization = relative_regularization
        self.scaling = scaling

        if scaling == 'divide':
            if weights != 'degree' or col_weights != 'degree':
                self.scaling = None
                warnings.warn(Warning("The scaling 'divide' is valid only with ``weights = 'degree'`` and "
                                      "``col_weights = 'degree'``. It will be ignored."))

        if solver == 'halko':
            self.solver: SVDSolver = HalkoSVD()
        elif solver == 'lanczos':
            self.solver: SVDSolver = LanczosSVD()
        else:
            self.solver = solver

        self.row_embedding_ = None
        self.col_embedding_ = None
        self.embedding_ = None
        self.singular_values_ = None
コード例 #5
0
ファイル: hits.py プロジェクト: vintasoftware/scikit-network
    def __init__(self, solver: Union[str, SVDSolver] = 'auto', **kwargs):
        super(HITS, self).__init__()

        if solver == 'halko':
            self.solver: SVDSolver = HalkoSVD(**kwargs)
        elif solver == 'lanczos':
            self.solver: SVDSolver = LanczosSVD(**kwargs)
        else:
            self.solver = solver
コード例 #6
0
ファイル: svd.py プロジェクト: valeeraZ/scikit-network
def set_svd_solver(solver: str, adjacency):
    """SVD solver based on keyword"""
    if solver == 'auto':
        solver: str = auto_solver(adjacency.nnz)
    if solver == 'lanczos':
        solver: SVDSolver = LanczosSVD()
    else:  # pragma: no cover
        solver: SVDSolver = HalkoSVD()
    return solver
コード例 #7
0
ファイル: svd.py プロジェクト: vishalbelsare/scikit-network
 def __init__(self,
              n_components=2,
              normalized: bool = False,
              solver: Union[str, SVDSolver] = 'lanczos'):
     super(PCA, self).__init__()
     self.n_components = n_components
     self.normalized = normalized
     if isinstance(solver, str):
         self.solver = LanczosSVD()
     else:
         self.solver = solver
コード例 #8
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS':
        """
        Compute HITS algorithm with a spectral method.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`HITS`
        """
        adjacency = check_format(adjacency)

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        self.solver.fit(adjacency, 1)
        hubs: np.ndarray = self.solver.left_singular_vectors_.reshape(-1)
        authorities: np.ndarray = self.solver.right_singular_vectors_.reshape(
            -1)

        h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum()
        a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum()

        if h_pos > h_neg:
            hubs = np.clip(hubs, a_min=0., a_max=None)
        else:
            hubs = np.clip(-hubs, a_min=0., a_max=None)

        if a_pos > a_neg:
            authorities = np.clip(authorities, a_min=0., a_max=None)
        else:
            authorities = np.clip(-authorities, a_min=0., a_max=None)

        if self.mode == 'hubs':
            self.scores_ = hubs
            self.col_scores_ = authorities
        elif self.mode == 'authorities':
            self.scores_ = authorities
            self.col_scores_ = hubs
        else:
            raise ValueError('Mode should be "hubs" or "authorities".')

        return self
コード例 #9
0
    def __init__(self,
                 mode: str = 'hubs',
                 solver: Union[str, SVDSolver] = 'auto'):
        super(HITS, self).__init__()

        self.mode = mode
        if solver == 'halko':
            self.solver: SVDSolver = HalkoSVD()
        elif solver == 'lanczos':
            self.solver: SVDSolver = LanczosSVD()
        else:
            self.solver = solver

        self.col_scores_ = None
コード例 #10
0
class BiSpectral():
    def __init__(self, embedding_dimension=2, regularization=0.001):
        self.embedding_dimension = embedding_dimension
        self.regularization = regularization

    def returnNormalized(self, adjacency):
        n1, n2 = adjacency.shape
        #total weight heuristic stated in De Lara (2019)
        adjacency = SparseLR(
            adjacency, [(self.regularization * np.ones(n1), np.ones(n2))])

        #left side of normalized laplacian (squared later)
        w_row = adjacency.dot(np.ones(adjacency.shape[1]))
        #right side of normalized laplacian (squared later)
        w_col = (adjacency.T).dot(np.ones(adjacency.shape[0]))
        self.diag_row = diag_pinv(np.sqrt(w_row))
        self.diag_col = diag_pinv(np.sqrt(w_col))
        normalized_adj = safe_sparse_dot(
            self.diag_row, safe_sparse_dot(adjacency, self.diag_col))

        return normalized_adj

    def fit(self, adjacency):
        self.solver = LanczosSVD()

        n_components = self.embedding_dimension + 1  # first eigenvector/value is doing nothing
        self.normalized_adj = self.returnNormalized(adjacency)

        # fitting and embedding
        self.solver.fit(self.normalized_adj, n_components)
        index = np.argsort(-self.solver.singular_values_)
        self.singular_values_ = self.solver.singular_values_[index[1:]]
        self.row_embedding_ = self.solver.left_singular_vectors_[:, index[1:]]
        self.col_embedding_ = self.solver.right_singular_vectors_[:, index[1:]]
        self.embedding_ = np.vstack((self.row_embedding_, self.col_embedding_))

        return self
コード例 #11
0
    def test_lanczos(self):
        solver = LanczosSVD()
        solver.fit(self.biadjacency, 2)
        self.assertEqual(len(solver.singular_values_), 2)
        self.assertAlmostEqual(svd_err(self.biadjacency, solver.singular_vectors_left_, solver.singular_vectors_right_,
                                       solver.singular_values_), 0)

        solver.fit(self.slr, 2)
        self.assertEqual(len(solver.singular_values_), 2)
        self.assertAlmostEqual(svd_err(self.slr, solver.singular_vectors_left_, solver.singular_vectors_right_,
                                       solver.singular_values_), 0)
コード例 #12
0
    def test_compare_solvers(self):
        lanczos = LanczosSVD()
        halko = HalkoSVD()

        lanczos.fit(self.biadjacency, 2)
        halko.fit(self.biadjacency, 2)
        self.assertAlmostEqual(np.linalg.norm(lanczos.singular_values_ - halko.singular_values_), 0.)

        lanczos.fit(self.slr, 2)
        halko.fit(self.slr, 2)
        self.assertAlmostEqual(np.linalg.norm(lanczos.singular_values_ - halko.singular_values_), 0.)
コード例 #13
0
    def test_options(self):
        biadjacency = star_wars(metadata=False)
        n_row, n_col = biadjacency.shape
        min_dim = min(n_row, n_col) - 1
        gsvd = GSVD(n_components=5, regularization=0., solver='halko')

        with self.assertWarns(Warning):
            gsvd.fit(biadjacency)
        self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim))
        self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim))

        gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos')
        gsvd.fit(biadjacency)
        gsvd.predict(np.random.rand(n_col))

        pca = PCA(n_components=min_dim, solver='lanczos')
        pca.fit(biadjacency)
        self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim))

        svd = SVD(n_components=min_dim, solver=LanczosSVD())
        svd.fit(biadjacency)
        self.assertEqual(svd.embedding_row_.shape, (n_row, min_dim))
コード例 #14
0
ファイル: svd.py プロジェクト: zhongkailv/scikit-network
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the GSVD of the adjacency or biadjacency matrix.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape

        if self.n_components > min(n_row, n_col) - 1:
            n_components = min(n_row, n_col) - 1
            warnings.warn(
                Warning(
                    "The dimension of the embedding must be strictly less than the number of rows "
                    "and the number of columns. Changed accordingly."))
        else:
            n_components = self.n_components

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = SparseLR(
                adjacency, [(regularization * np.ones(n_row), np.ones(n_col))])
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self
コード例 #15
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiSpectral':
        """
        Computes the generalized SVD of the adjacency matrix.

        Parameters
        ----------
        adjacency: array-like, shape = (n1, n2)
            Adjacency matrix, where n1 = n2 is the number of nodes for a standard graph,
            n1, n2 are the number of nodes in each part for a bipartite graph.

        Returns
        -------
        self: :class:`BiSpectral`
        """
        adjacency = check_format(adjacency).asfptype()
        n1, n2 = adjacency.shape

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        total_weight = adjacency.dot(np.ones(n2)).sum()
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * total_weight / (n1 * n2)
            adjacency = SparseLR(adjacency, [(regularization * np.ones(n1), np.ones(n2))])

        w_row = check_weights(self.weights, adjacency)
        w_col = check_weights(self.col_weights, adjacency.T)
        diag_row = diag_pinv(np.sqrt(w_row))
        diag_col = diag_pinv(np.sqrt(w_col))

        normalized_adj = safe_sparse_dot(diag_row, safe_sparse_dot(adjacency, diag_col))

        # svd
        if self.embedding_dimension >= min(n1, n2) - 1:
            n_components = min(n1, n2) - 1
            warnings.warn(Warning("The dimension of the embedding must be less than the number of rows "
                                  "and the number of columns. Changed accordingly."))
        else:
            n_components = self.embedding_dimension + 1
        self.solver.fit(normalized_adj, n_components)

        index = np.argsort(-self.solver.singular_values_)
        self.singular_values_ = self.solver.singular_values_[index[1:]]
        self.row_embedding_ = diag_row.dot(self.solver.left_singular_vectors_[:, index[1:]])
        self.col_embedding_ = diag_col.dot(self.solver.right_singular_vectors_[:, index[1:]])

        if self.scaling:
            if self.scaling == 'multiply':
                self.row_embedding_ *= np.sqrt(self.singular_values_)
                self.col_embedding_ *= np.sqrt(self.singular_values_)
            elif self.scaling == 'divide':
                energy_levels: np.ndarray = np.sqrt(1 - np.clip(self.singular_values_, 0, 1) ** 2)
                energy_levels[energy_levels > 0] = 1 / energy_levels[energy_levels > 0]
                self.row_embedding_ *= energy_levels
                self.col_embedding_ *= energy_levels
            elif self.scaling == 'barycenter':
                self.row_embedding_ *= self.singular_values_
            else:
                warnings.warn(Warning("The scaling must be 'multiply' or 'divide' or 'barycenter'. No scaling done."))

        self.embedding_ = np.vstack((self.row_embedding_, self.col_embedding_))
        return self
コード例 #16
0
ファイル: svd.py プロジェクト: vishalbelsare/scikit-network
    def fit(self, input_matrix: Union[sparse.csr_matrix,
                                      np.ndarray]) -> 'GSVD':
        """Compute the embedding of the graph.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        self._init_vars()

        adjacency = check_format(input_matrix).asfptype()
        n_row, n_col = adjacency.shape
        n_components = check_n_components(self.n_components,
                                          min(n_row, n_col) - 1)

        if isinstance(self.solver, str):
            self.solver = LanczosSVD()
        regularization = self.regularization
        if regularization:
            adjacency_reg = Regularizer(adjacency, regularization)
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.weights_col_ = weights_col

        return self
コード例 #17
0
ファイル: svd.py プロジェクト: vishalbelsare/scikit-network
class GSVD(BaseEmbedding):
    """Graph embedding by Generalized Singular Value Decomposition of the adjacency or biadjacency matrix :math:`A`.
    This is equivalent to the Singular Value Decomposition of the matrix :math:`D_1^{- \\alpha_1}AD_2^{- \\alpha_2}`
    where :math:`D_1, D_2` are the diagonal matrices of row weights and columns weights, respectively, and
    :math:`\\alpha_1, \\alpha_2` are parameters.

    Parameters
    -----------
    n_components : int
        Dimension of the embedding.
    regularization : ``None`` or float (default = ``None``)
        Regularization factor :math:`\\alpha` so that the matrix is :math:`A + \\alpha \\frac{11^T}{n}`.
    factor_row : float (default = 0.5)
        Power factor :math:`\\alpha_1` applied to the diagonal matrix of row weights.
    factor_col : float (default = 0.5)
        Power factor :math:`\\alpha_2` applied to the diagonal matrix of column weights.
    factor_singular : float (default = 0.)
        Parameter :math:`\\alpha` applied to the singular values on right singular vectors.
        The embedding of rows and columns are respectively :math:`D_1^{- \\alpha_1}U \\Sigma^{1-\\alpha}` and
        :math:`D_2^{- \\alpha_2}V \\Sigma^\\alpha` where:

        * :math:`U` is the matrix of left singular vectors, shape (n_row, n_components)
        * :math:`V` is the matrix of right singular vectors, shape (n_col, n_components)
        * :math:`\\Sigma` is the diagonal matrix of singular values, shape (n_components, n_components)

    normalized : bool (default = ``True``)
        If ``True``, normalized the embedding so that each vector has norm 1 in the embedding space, i.e.,
        each vector lies on the unit sphere.
    solver : ``'lanczos'`` (Lanczos algorithm, default) or :class:`SVDSolver` (custom solver)
        Which solver to use.

    Attributes
    ----------
    embedding_ : array, shape = (n, n_components)
        Embedding of the nodes.
    embedding_row_ : array, shape = (n_row, n_components)
        Embedding of the rows, for bipartite graphs.
    embedding_col_ : array, shape = (n_col, n_components)
        Embedding of the columns, for bipartite graphs.
    singular_values_ : np.ndarray, shape = (n_components)
        Singular values.
    singular_vectors_left_ : np.ndarray, shape = (n_row, n_components)
        Left singular vectors.
    singular_vectors_right_ : np.ndarray, shape = (n_col, n_components)
        Right singular vectors.
    weights_col_ : np.ndarray, shape = (n2)
        Weights applied to columns.

    Example
    -------
    >>> from sknetwork.embedding import GSVD
    >>> from sknetwork.data import karate_club
    >>> gsvd = GSVD()
    >>> adjacency = karate_club()
    >>> embedding = gsvd.fit_transform(adjacency)
    >>> embedding.shape
    (34, 2)

    References
    ----------
    Abdi, H. (2007).
    `Singular value decomposition (SVD) and generalized singular value decomposition.
    <https://www.cs.cornell.edu/cv/ResearchPDF/Generalizing%20The%20Singular%20Value%20Decomposition.pdf>`_
    Encyclopedia of measurement and statistics, 907-912.
    """
    def __init__(self,
                 n_components=2,
                 regularization: Union[None, float] = None,
                 factor_row: float = 0.5,
                 factor_col: float = 0.5,
                 factor_singular: float = 0.,
                 normalized: bool = True,
                 solver: Union[str, SVDSolver] = 'lanczos'):
        super(GSVD, self).__init__()

        self.n_components = n_components
        if regularization == 0:
            self.regularization = None
        else:
            self.regularization = regularization
        self.factor_row = factor_row
        self.factor_col = factor_col
        self.factor_singular = factor_singular
        self.normalized = normalized
        self.solver = solver

        self.singular_values_ = None
        self.singular_vectors_left_ = None
        self.singular_vectors_right_ = None
        self.regularization_ = None
        self.weights_col_ = None

    def fit(self, input_matrix: Union[sparse.csr_matrix,
                                      np.ndarray]) -> 'GSVD':
        """Compute the embedding of the graph.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        self._init_vars()

        adjacency = check_format(input_matrix).asfptype()
        n_row, n_col = adjacency.shape
        n_components = check_n_components(self.n_components,
                                          min(n_row, n_col) - 1)

        if isinstance(self.solver, str):
            self.solver = LanczosSVD()
        regularization = self.regularization
        if regularization:
            adjacency_reg = Regularizer(adjacency, regularization)
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.weights_col_ = weights_col

        return self

    @staticmethod
    def _check_adj_vector(adjacency_vectors):
        check_nonnegative(adjacency_vectors)

    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new rows, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        singular_vectors_right = self.singular_vectors_right_
        singular_values = self.singular_values_

        n_row, _ = self.embedding_row_.shape
        n_col, _ = self.embedding_col_.shape

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n_col)
        self._check_adj_vector(adjacency_vectors)

        # regularization
        if self.regularization:
            adjacency_vectors = Regularizer(adjacency_vectors,
                                            self.regularization)

        # weighting
        weights_row = adjacency_vectors.dot(np.ones(n_col))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(self.weights_col_, self.factor_col))
        adjacency_vectors = safe_sparse_dot(
            diag_row, safe_sparse_dot(adjacency_vectors, diag_col))

        # projection in the embedding space
        averaging = adjacency_vectors
        embedding_vectors = diag_row.dot(averaging.dot(singular_vectors_right))

        # scaling
        embedding_vectors /= np.power(singular_values, self.factor_singular)

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if len(embedding_vectors) == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
コード例 #18
0
ファイル: svd.py プロジェクト: mynameisvinn/scikit-network
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the GSVD of the adjacency or biadjacency matrix.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape
        n_components = check_n_components(self.n_components,
                                          min(n_row, n_col) - 1)

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:  # pragma: no cover
                self.solver: SVDSolver = HalkoSVD()

        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = RegularizedAdjacency(adjacency, regularization)
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self