Ejemplo n.º 1
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        eigenvectors = self.eigenvectors_
        eigenvalues = self.eigenvalues_
        n = eigenvectors.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # projection in the embedding space
        averaging = normalize(adjacency_vectors, p=1)
        embedding_vectors = averaging.dot(eigenvectors)

        if not self.barycenter:
            if self.normalized_laplacian:
                factors = 1 - eigenvalues
            else:
                # to be modified
                factors = 1 - eigenvalues / (adjacency_vectors.sum() + 1e-9)
            factors_inv_diag = diag_pinv(factors)
            embedding_vectors = factors_inv_diag.dot(embedding_vectors.T).T

        if self.equalize:
            embedding_vectors = diag_pinv(np.sqrt(eigenvalues)).dot(
                embedding_vectors.T).T

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Ejemplo n.º 2
0
    def _secondary_outputs(self, biadjacency):
        """Compute different variables from labels_."""
        if self.return_membership:
            membership_row = membership_matrix(self.labels_row_)
            membership_col = membership_matrix(self.labels_col_)
            self.membership_row_ = normalize(biadjacency.dot(membership_col))
            self.membership_col_ = normalize(biadjacency.T.dot(membership_row))

        if self.return_aggregate:
            membership_row = membership_matrix(self.labels_row_)
            membership_col = membership_matrix(self.labels_col_)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_
        return self
Ejemplo n.º 3
0
    def fit(
        self, adjacency: Union[sparse.csr_matrix,
                               np.ndarray]) -> 'LaplacianEmbedding':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`LaplacianEmbedding`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        if regularize:
            solver: EigSolver = LanczosEig()
        else:
            solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n
            laplacian = LaplacianOperator(adjacency, regularization)
        else:
            weight_diag = sparse.diags(weights, format='csr')
            laplacian = weight_diag - adjacency

        solver.which = 'SM'
        solver.fit(matrix=laplacian, n_components=n_components)
        eigenvalues = solver.eigenvalues_[1:]
        eigenvectors = solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
Ejemplo n.º 4
0
    def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
        """Predict the embedding of new rows, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        embedding = self.embedding_
        n = embedding.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        embedding_vectors = normalize(adjacency_vectors).dot(embedding)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Ejemplo n.º 5
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        eigenvectors = self.eigenvectors_
        eigenvalues = self.eigenvalues_
        n = eigenvectors.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # projection in the embedding space
        averaging = normalize(adjacency_vectors, p=1)
        embedding_vectors = averaging.dot(eigenvectors)
        embedding_vectors = diag_pinv(eigenvalues).dot(embedding_vectors.T).T

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling)
            embedding_vectors = eigenvalues_inv_diag.dot(embedding_vectors.T).T

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Ejemplo n.º 6
0
 def _secondary_outputs(self, adjacency):
     """Compute different variables from labels_."""
     if self.return_membership or self.return_aggregate:
         membership = membership_matrix(self.labels_)
         if self.return_membership:
             self.membership_ = normalize(adjacency.dot(membership))
         if self.return_aggregate:
             self.adjacency_ = sparse.csr_matrix(
                 membership.T.dot(adjacency.dot(membership)))
     return self
Ejemplo n.º 7
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        if self.node_order == 'random':
            np.random.shuffle(index_remain)
        elif self.node_order == 'decreasing':
            index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]
        elif self.node_order == 'increasing':
            index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]

        labels = -np.ones(n, dtype=np.int32)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=np.int32)

        indptr = adjacency.indptr.astype(np.int32)
        indices = adjacency.indices.astype(np.int32)
        if self.weighted:
            data = adjacency.data.astype(np.float32)
        else:
            data = np.ones(n, dtype=np.float32)

        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            labels = vote_update(indptr, indices, data, labels, index_remain)

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Ejemplo n.º 8
0
 def _secondary_outputs(self, adjacency):
     """Compute different variables from labels_."""
     if self.return_membership or self.return_aggregate:
         if np.issubdtype(adjacency.data.dtype, np.bool_):
             adjacency = adjacency.astype(float)
         membership = membership_matrix(self.labels_)
         if self.return_membership:
             self.membership_ = normalize(adjacency.dot(membership))
         if self.return_aggregate:
             self.adjacency_ = sparse.csr_matrix(
                 membership.T.dot(adjacency.dot(membership)))
     return self
Ejemplo n.º 9
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'RandomProjection':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`RandomProjection`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        n = adjacency.shape[0]

        random_generator = check_random_state(self.random_state)
        random_matrix = random_generator.normal(size=(n, self.n_components))

        # make the matrix orthogonal
        random_matrix, _ = np.linalg.qr(random_matrix)

        factor = random_matrix
        embedding = factor.copy()

        if self.random_walk:
            transition = normalize(adjacency)
        else:
            transition = adjacency

        for t in range(self.n_iter):
            factor = self.alpha * transition.dot(factor)
            embedding += factor

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding

        return self
Ejemplo n.º 10
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new rows, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        singular_vectors_right = self.singular_vectors_right_
        singular_values = self.singular_values_

        n_row, _ = self.embedding_row_.shape
        n_col, _ = self.embedding_col_.shape

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n_col)
        self._check_adj_vector(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # weighting
        weights_row = adjacency_vectors.dot(np.ones(n_col))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(self.weights_col_, self.factor_col))
        adjacency_vectors = safe_sparse_dot(
            diag_row, safe_sparse_dot(adjacency_vectors, diag_col))

        # projection in the embedding space
        averaging = adjacency_vectors
        embedding_vectors = diag_row.dot(averaging.dot(singular_vectors_right))

        # scaling
        embedding_vectors /= np.power(singular_values, self.factor_singular)

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Ejemplo n.º 11
0
    def test_formats(self):
        n = 5
        mat1 = normalize(np.eye(n))
        mat2 = normalize(sparse.eye(n))
        mat3 = normalize(CoNeighbor(mat2))

        x = np.random.randn(n)
        self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - x), 0)
        self.assertAlmostEqual(np.linalg.norm(mat2.dot(x) - x), 0)
        self.assertAlmostEqual(np.linalg.norm(mat3.dot(x) - x), 0)

        mat1 = np.random.rand(n**2).reshape((n, n))
        mat2 = sparse.csr_matrix(mat1)
        mat1 = normalize(mat1, p=2)
        mat2 = normalize(mat2, p=2)
        self.assertAlmostEqual(np.linalg.norm(mat1.dot(x) - mat2.dot(x)), 0)

        with self.assertRaises(NotImplementedError):
            normalize(mat3, p=2)
        with self.assertRaises(NotImplementedError):
            normalize(mat1, p=3)
Ejemplo n.º 12
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.

        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency = check_format(adjacency)
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instanciate_vars(
            adjacency, seeds)

        labels = -np.ones(n, dtype=int)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=int)
        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            for i in index_remain:
                labels_ = labels[
                    adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i +
                                                                           1]]]
                labels_ = labels_[labels_ >= 0]
                if len(labels_):
                    labels_unique, counts = np.unique(labels_,
                                                      return_counts=True)
                    labels[i] = labels_unique[np.argmax(counts)]

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        return self
Ejemplo n.º 13
0
    def test_coneighbors(self):
        biadjacency = test_bigraph()
        operator = CoNeighbor(biadjacency)
        transition = normalize(operator)
        x = transition.dot(np.ones(transition.shape[1]))

        self.assertAlmostEqual(np.linalg.norm(x - np.ones(operator.shape[0])), 0)
        operator.astype('float')
        operator.right_sparse_dot(sparse.eye(operator.shape[1], format='csr'))

        operator1 = CoNeighbor(biadjacency, normalized=False)
        operator2 = CoNeighbor(biadjacency, normalized=False)
        x = np.random.randn(operator.shape[1])
        x1 = (-operator1).dot(x)
        x2 = (operator2 * -1).dot(x)
        x3 = operator1.T.dot(x)
        self.assertAlmostEqual(np.linalg.norm(x1 - x2), 0)
        self.assertAlmostEqual(np.linalg.norm(x2 - x3), 0)
Ejemplo n.º 14
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: EigSolver = LanczosEig()
            else:  # pragma: no cover
                self.solver: EigSolver = HalkoEig()

        n_components = check_n_components(self.n_components, n - 2)
        n_components += 1

        if self.equalize and (self.regularization is None
                              or self.regularization
                              == 0.) and not is_connected(adjacency):
            raise ValueError(
                "The option 'equalize' is valid only if the graph is connected or with regularization."
                "Call 'fit' either with 'equalize' = False or positive 'regularization'."
            )

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        if self.normalized_laplacian:
            # Finding the largest eigenvalues of the normalized adjacency is easier for the solver than finding the
            # smallest eigenvalues of the normalized laplacian.
            weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

            if regularization:
                norm_adjacency = NormalizedAdjacencyOperator(
                    adjacency, regularization)
            else:
                norm_adjacency = weights_inv_sqrt_diag.dot(
                    adjacency.dot(weights_inv_sqrt_diag))

            self.solver.which = 'LA'
            self.solver.fit(matrix=norm_adjacency, n_components=n_components)
            eigenvalues = 1 - self.solver.eigenvalues_
            # eigenvalues of the Laplacian in increasing order
            index = np.argsort(eigenvalues)[1:]
            # skip first eigenvalue
            eigenvalues = eigenvalues[index]
            # eigenvectors of the Laplacian, skip first eigenvector
            eigenvectors = np.array(
                weights_inv_sqrt_diag.dot(self.solver.eigenvectors_[:, index]))

        else:
            if regularization:
                laplacian = LaplacianOperator(adjacency, regularization)
            else:
                weight_diag = sparse.diags(weights, format='csr')
                laplacian = weight_diag - adjacency

            self.solver.which = 'SM'
            self.solver.fit(matrix=laplacian, n_components=n_components)
            eigenvalues = self.solver.eigenvalues_[1:]
            eigenvectors = self.solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.equalize:
            eigenvalues_sqrt_inv_diag = diag_pinv(np.sqrt(eigenvalues))
            embedding = eigenvalues_sqrt_inv_diag.dot(embedding.T).T

        if self.barycenter:
            eigenvalues_diag = sparse.diags(eigenvalues)
            subtract = eigenvalues_diag.dot(embedding.T).T
            if not self.normalized_laplacian:
                weights_inv_diag = diag_pinv(weights)
                subtract = weights_inv_diag.dot(subtract)
            embedding -= subtract

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
Ejemplo n.º 15
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the embedding of the graph.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape
        n_components = check_n_components(self.n_components,
                                          min(n_row, n_col) - 1)

        if isinstance(self.solver, str):
            self.solver = set_svd_solver(self.solver, adjacency)
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = RegularizedAdjacency(adjacency, regularization)
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self
Ejemplo n.º 16
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, when possible (otherwise return 0).

        Each new node is defined by its adjacency row vector.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.

        Example
        -------
        >>> from sknetwork.embedding import Spectral
        >>> from sknetwork.data import karate_club
        >>> spectral = Spectral(n_components=3)
        >>> adjacency = karate_club()
        >>> adjacency_vector = np.arange(34) < 5
        >>> _ = spectral.fit(adjacency)
        >>> len(spectral.predict(adjacency_vector))
        3
        """
        self._check_fitted()

        # input
        if self.bipartite:
            n = len(self.embedding_col_)
        else:
            n = len(self.embedding_)
        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        if self.bipartite:
            shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0])
            adjacency_vectors = sparse.csr_matrix(adjacency_vectors)
            adjacency_vectors = sparse.hstack(
                [sparse.csr_matrix(shape), adjacency_vectors], format='csr')
        eigenvectors = self.eigenvectors_
        eigenvalues = self.eigenvalues_

        # regularization
        if self.regularized:
            regularization = np.abs(self.regularization)
        else:
            regularization = 0
        normalizer = Normalizer(adjacency_vectors, regularization)

        # prediction
        embedding_vectors = normalizer.dot(eigenvectors)
        normalized_laplacian = self.decomposition == 'rw'
        if normalized_laplacian:
            norm_vect = eigenvalues.copy()
            norm_vect[norm_vect == 0] = 1
            embedding_vectors /= norm_vect
        else:
            norm_matrix = sparse.csr_matrix(
                1 - np.outer(normalizer.norm_diag.data, eigenvalues))
            norm_matrix.data = 1 / norm_matrix.data
            embedding_vectors *= norm_matrix.toarray()

        # normalization
        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        # shape
        if len(embedding_vectors) == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Ejemplo n.º 17
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        # Spectral decomposition of the normalized adjacency matrix
        weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

        if regularization:
            norm_adjacency = NormalizedAdjacencyOperator(
                adjacency, regularization)
        else:
            norm_adjacency = weights_inv_sqrt_diag.dot(
                adjacency.dot(weights_inv_sqrt_diag))

        solver.which = 'LA'
        solver.fit(matrix=norm_adjacency, n_components=n_components)
        eigenvalues = solver.eigenvalues_
        index = np.argsort(-eigenvalues)[1:]  # skip first eigenvalue
        eigenvalues = eigenvalues[index]
        eigenvectors = weights_inv_sqrt_diag.dot(solver.eigenvectors_[:,
                                                                      index])

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
Ejemplo n.º 18
0
    def fit(self,
            input_matrix: Union[sparse.csr_matrix, np.ndarray],
            force_bipartite: bool = False) -> 'Spectral':
        """Compute the graph embedding.

        If the input matrix :math:`B` is not square (e.g., biadjacency matrix of a bipartite graph) or not symmetric
        (e.g., adjacency matrix of a directed graph), use the adjacency matrix

        :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`

        and return the embedding for both rows and columns of the input matrix :math:`B`.

        Parameters
        ----------
        input_matrix :
              Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite : bool (default = ``False``)
            If ``True``, force the input matrix to be considered as a biadjacency matrix.

        Returns
        -------
        self: :class:`Spectral`
        """
        # input
        adjacency, self.bipartite = get_adjacency(
            input_matrix,
            allow_directed=False,
            force_bipartite=force_bipartite)
        n = adjacency.shape[0]

        # regularization
        regularization = self._get_regularization(self.regularization,
                                                  adjacency)
        self.regularized = regularization > 0

        # laplacian
        normalized_laplacian = self.decomposition == 'rw'
        laplacian = Laplacian(adjacency, regularization, normalized_laplacian)

        # spectral decomposition
        n_components = check_n_components(self.n_components, n - 2) + 1
        solver = LanczosEig(which='SM')
        solver.fit(matrix=laplacian, n_components=n_components)
        index = np.argsort(
            solver.eigenvalues_)[1:]  # increasing order, skip first

        eigenvalues = solver.eigenvalues_[index]
        eigenvectors = solver.eigenvectors_[:, index]

        if normalized_laplacian:
            eigenvectors = laplacian.norm_diag.dot(eigenvectors)
            eigenvalues = 1 - eigenvalues

        # embedding
        embedding = eigenvectors.copy()
        if self.normalized:
            embedding = normalize(embedding, p=2)

        # output
        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        if self.bipartite:
            self._split_vars(input_matrix.shape)

        return self
Ejemplo n.º 19
0
def cosine_modularity(adjacency, embedding: np.ndarray, embedding_col=None, resolution=1., weights='degree',
                      return_all: bool = False):
    """Quality metric of an embedding :math:`x` defined by:

    :math:`Q = \\sum_{ij}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, x_j)}{2}\\right)`

    where

    * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n
    * :math:`w = 1^TA1` is the total weight of the graph.

    For bipartite graphs with column embedding :math:`y`, the metric is

    :math:`Q = \\sum_{ij}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, y_j)}{2}\\right)`

    where

    * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n
    * :math:`w = 1^TB1` is the total weight of the graph.

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    embedding :
        Embedding of the nodes.
    embedding_col :
        Embedding of the columns (for bipartite graphs).
    resolution :
        Resolution parameter.
    weights : ``'degree'`` or ``'uniform'``
        Weights of the nodes.
    return_all :
        If ``True``, also return fit and diversity

    Returns
    -------
    modularity : float
    fit: float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.embedding import cosine_modularity
    >>> from sknetwork.data import karate_club
    >>> graph = karate_club(metadata=True)
    >>> adjacency = graph.adjacency
    >>> embedding = graph.position
    >>> np.round(cosine_modularity(adjacency, embedding), 2)
    0.35
    """
    adjacency = check_format(adjacency)
    total_weight: float = adjacency.data.sum()

    if embedding_col is None:
        check_square(adjacency)
        embedding_col = embedding.copy()

    embedding_row_norm = normalize(embedding, p=2)
    embedding_col_norm = normalize(embedding_col, p=2)

    probs_row = check_probs(weights, adjacency)
    probs_col = check_probs(weights, adjacency.T)

    if isinstance(embedding_row_norm, sparse.csr_matrix) and isinstance(embedding_col_norm, sparse.csr_matrix):
        fit: float = 0.5 * (1 + (embedding_row_norm.multiply(adjacency.dot(embedding_col_norm))).sum() / total_weight)
    else:
        fit: float = 0.5 * (
            1 + (np.multiply(embedding_row_norm, adjacency.dot(embedding_col_norm))).sum() / total_weight)
    div: float = 0.5 * (1 + (embedding.T.dot(probs_row)).dot(embedding_col.T.dot(probs_col)))

    if return_all:
        return fit, div, fit - resolution * div
    else:
        return fit - resolution * div
Ejemplo n.º 20
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the GSVD of the adjacency or biadjacency matrix.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape

        if self.n_components > min(n_row, n_col) - 1:
            n_components = min(n_row, n_col) - 1
            warnings.warn(
                Warning(
                    "The dimension of the embedding must be strictly less than the number of rows "
                    "and the number of columns. Changed accordingly."))
        else:
            n_components = self.n_components

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = SparseLR(
                adjacency, [(regularization * np.ones(n_row), np.ones(n_col))])
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self
Ejemplo n.º 21
0
    def fit(self, biadjacency: Union[sparse.csr_matrix,
                                     np.ndarray]) -> 'BiKMeans':
        """Apply embedding method followed by clustering to the graph.

        Parameters
        ----------
        biadjacency:
            Biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiKMeans`
        """
        n_row, n_col = biadjacency.shape
        check_n_clusters(self.n_clusters, n_row)

        method = self.embedding_method
        method.fit(biadjacency)

        if self.co_cluster:
            embedding = np.vstack(
                (method.embedding_row_, method.embedding_col_))
        else:
            embedding = method.embedding_

        kmeans = KMeansDense(self.n_clusters)
        kmeans.fit(embedding)

        if self.sort_clusters:
            labels = reindex_labels(kmeans.labels_)
        else:
            labels = kmeans.labels_

        self.labels_ = labels
        if self.co_cluster:
            self._split_vars(n_row)
        else:
            self.labels_row_ = labels

        if self.return_membership:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                self.membership_row_ = normalize(
                    biadjacency.dot(membership_col))
                self.membership_col_ = normalize(
                    biadjacency.T.dot(membership_row))
            else:
                self.membership_row_ = normalize(
                    biadjacency.dot(biadjacency.T.dot(membership_row)))
            self.membership_ = self.membership_row_

        if self.return_aggregate:
            membership_row = membership_matrix(self.labels_row_,
                                               n_labels=self.n_clusters)
            biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency))
            if self.labels_col_ is not None:
                membership_col = membership_matrix(self.labels_col_,
                                                   n_labels=self.n_clusters)
                biadjacency_ = biadjacency_.dot(membership_col)
            self.biadjacency_ = biadjacency_

        return self