Beispiel #1
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        eigenvectors = self.eigenvectors_
        eigenvalues = self.eigenvalues_
        n = eigenvectors.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # projection in the embedding space
        averaging = normalize(adjacency_vectors, p=1)
        embedding_vectors = averaging.dot(eigenvectors)

        if not self.barycenter:
            if self.normalized_laplacian:
                factors = 1 - eigenvalues
            else:
                # to be modified
                factors = 1 - eigenvalues / (adjacency_vectors.sum() + 1e-9)
            factors_inv_diag = diag_pinv(factors)
            embedding_vectors = factors_inv_diag.dot(embedding_vectors.T).T

        if self.equalize:
            embedding_vectors = diag_pinv(np.sqrt(eigenvalues)).dot(
                embedding_vectors.T).T

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Beispiel #2
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new rows, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        singular_vectors_right = self.singular_vectors_right_
        singular_values = self.singular_values_

        n_row, _ = self.embedding_row_.shape
        n_col, _ = self.embedding_col_.shape

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n_col)
        self._check_adj_vector(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # weighting
        weights_row = adjacency_vectors.dot(np.ones(n_col))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(self.weights_col_, self.factor_col))
        adjacency_vectors = safe_sparse_dot(
            diag_row, safe_sparse_dot(adjacency_vectors, diag_col))

        # projection in the embedding space
        averaging = adjacency_vectors
        embedding_vectors = diag_row.dot(averaging.dot(singular_vectors_right))

        # scaling
        embedding_vectors /= np.power(singular_values, self.factor_singular)

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
Beispiel #3
0
    def fit(
        self, adjacency: Union[sparse.csr_matrix,
                               np.ndarray]) -> 'LaplacianEmbedding':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`LaplacianEmbedding`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        if regularize:
            solver: EigSolver = LanczosEig()
        else:
            solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n
            laplacian = LaplacianOperator(adjacency, regularization)
        else:
            weight_diag = sparse.diags(weights, format='csr')
            laplacian = weight_diag - adjacency

        solver.which = 'SM'
        solver.fit(matrix=laplacian, n_components=n_components)
        eigenvalues = solver.eigenvalues_[1:]
        eigenvectors = solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
Beispiel #4
0
    def returnNormalized(self, adjacency):
        n1, n2 = adjacency.shape
        #total weight heuristic stated in De Lara (2019)
        adjacency = SparseLR(
            adjacency, [(self.regularization * np.ones(n1), np.ones(n2))])

        #left side of normalized laplacian (squared later)
        w_row = adjacency.dot(np.ones(adjacency.shape[1]))
        #right side of normalized laplacian (squared later)
        w_col = (adjacency.T).dot(np.ones(adjacency.shape[0]))
        self.diag_row = diag_pinv(np.sqrt(w_row))
        self.diag_col = diag_pinv(np.sqrt(w_col))
        normalized_adj = safe_sparse_dot(
            self.diag_row, safe_sparse_dot(adjacency, self.diag_col))

        return normalized_adj
Beispiel #5
0
    def predict(
            self, adjacency_vectors: Union[sparse.csr_matrix,
                                           np.ndarray]) -> np.ndarray:
        """Predict the embedding of new nodes, defined by their adjacency vectors.

        Parameters
        ----------
        adjacency_vectors :
            Adjacency vectors of nodes.
            Array of shape (n_col,) (single vector) or (n_vectors, n_col)

        Returns
        -------
        embedding_vectors : np.ndarray
            Embedding of the nodes.
        """
        self._check_fitted()
        eigenvectors = self.eigenvectors_
        eigenvalues = self.eigenvalues_
        n = eigenvectors.shape[0]

        adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
        check_nonnegative(adjacency_vectors)

        # regularization
        if self.regularization_:
            adjacency_vectors = RegularizedAdjacency(adjacency_vectors,
                                                     self.regularization_)

        # projection in the embedding space
        averaging = normalize(adjacency_vectors, p=1)
        embedding_vectors = averaging.dot(eigenvectors)
        embedding_vectors = diag_pinv(eigenvalues).dot(embedding_vectors.T).T

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling)
            embedding_vectors = eigenvalues_inv_diag.dot(embedding_vectors.T).T

        if self.normalized:
            embedding_vectors = normalize(embedding_vectors, p=2)

        if embedding_vectors.shape[0] == 1:
            embedding_vectors = embedding_vectors.ravel()

        return embedding_vectors
 def __init__(self,
              adjacency: Union[sparse.csr_matrix, np.ndarray],
              regularization: float = 0):
     if adjacency.ndim == 1:
         adjacency = adjacency.reshape(1, -1)
     super(Normalizer, self).__init__(dtype=float, shape=adjacency.shape)
     n_col = adjacency.shape[1]
     self.regularization = regularization
     self.adjacency = adjacency
     self.norm_diag = diag_pinv(
         adjacency.dot(np.ones(n_col)) + regularization)
 def __init__(self,
              adjacency: Union[sparse.csr_matrix, np.ndarray],
              regularization: float = 0,
              normalized_laplacian: bool = False):
     super(Laplacian, self).__init__(dtype=float, shape=adjacency.shape)
     n = adjacency.shape[0]
     self.regularization = regularization
     self.normalized_laplacian = normalized_laplacian
     self.weights = adjacency.dot(np.ones(n))
     self.laplacian = sparse.diags(self.weights, format='csr') - adjacency
     if self.normalized_laplacian:
         self.norm_diag = diag_pinv(np.sqrt(self.weights + regularization))
def transition_matrix(adjacency: Union[sparse.csr_matrix, np.ndarray]):
    """Compute the transition matrix of the random walk :

    :math:`P = D^+A`,

    where :math:`D^+` is the pseudo-inverse of the degree matrix.

    Parameters
    ----------
    adjacency :
        Adjacency or biadjacency matrix.

    Returns
    -------
    sparse.csr_matrix:
        Transition matrix.

    """
    adjacency = sparse.csr_matrix(adjacency)
    d: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1]))

    return diag_pinv(d).dot(adjacency)
Beispiel #9
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the embedding of the graph.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape
        n_components = check_n_components(self.n_components,
                                          min(n_row, n_col) - 1)

        if isinstance(self.solver, str):
            self.solver = set_svd_solver(self.solver, adjacency)
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = RegularizedAdjacency(adjacency, regularization)
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self
Beispiel #10
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        solver = set_solver(self.solver, adjacency)
        n_components = 1 + check_n_components(self.n_components, n - 2)

        regularize: bool = not (self.regularization is None
                                or self.regularization == 0.)
        check_scaling(self.scaling, adjacency, regularize)

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        # Spectral decomposition of the normalized adjacency matrix
        weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

        if regularization:
            norm_adjacency = NormalizedAdjacencyOperator(
                adjacency, regularization)
        else:
            norm_adjacency = weights_inv_sqrt_diag.dot(
                adjacency.dot(weights_inv_sqrt_diag))

        solver.which = 'LA'
        solver.fit(matrix=norm_adjacency, n_components=n_components)
        eigenvalues = solver.eigenvalues_
        index = np.argsort(-eigenvalues)[1:]  # skip first eigenvalue
        eigenvalues = eigenvalues[index]
        eigenvectors = weights_inv_sqrt_diag.dot(solver.eigenvectors_[:,
                                                                      index])

        embedding = eigenvectors.copy()

        if self.scaling:
            eigenvalues_inv_diag = diag_pinv((1 - eigenvalues)**self.scaling)
            embedding = eigenvalues_inv_diag.dot(embedding.T).T

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiSpectral':
        """
        Computes the generalized SVD of the adjacency matrix.

        Parameters
        ----------
        adjacency: array-like, shape = (n1, n2)
            Adjacency matrix, where n1 = n2 is the number of nodes for a standard graph,
            n1, n2 are the number of nodes in each part for a bipartite graph.

        Returns
        -------
        self: :class:`BiSpectral`
        """
        adjacency = check_format(adjacency).asfptype()
        n1, n2 = adjacency.shape

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        total_weight = adjacency.dot(np.ones(n2)).sum()
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * total_weight / (n1 * n2)
            adjacency = SparseLR(adjacency, [(regularization * np.ones(n1), np.ones(n2))])

        w_row = check_weights(self.weights, adjacency)
        w_col = check_weights(self.col_weights, adjacency.T)
        diag_row = diag_pinv(np.sqrt(w_row))
        diag_col = diag_pinv(np.sqrt(w_col))

        normalized_adj = safe_sparse_dot(diag_row, safe_sparse_dot(adjacency, diag_col))

        # svd
        if self.embedding_dimension >= min(n1, n2) - 1:
            n_components = min(n1, n2) - 1
            warnings.warn(Warning("The dimension of the embedding must be less than the number of rows "
                                  "and the number of columns. Changed accordingly."))
        else:
            n_components = self.embedding_dimension + 1
        self.solver.fit(normalized_adj, n_components)

        index = np.argsort(-self.solver.singular_values_)
        self.singular_values_ = self.solver.singular_values_[index[1:]]
        self.row_embedding_ = diag_row.dot(self.solver.left_singular_vectors_[:, index[1:]])
        self.col_embedding_ = diag_col.dot(self.solver.right_singular_vectors_[:, index[1:]])

        if self.scaling:
            if self.scaling == 'multiply':
                self.row_embedding_ *= np.sqrt(self.singular_values_)
                self.col_embedding_ *= np.sqrt(self.singular_values_)
            elif self.scaling == 'divide':
                energy_levels: np.ndarray = np.sqrt(1 - np.clip(self.singular_values_, 0, 1) ** 2)
                energy_levels[energy_levels > 0] = 1 / energy_levels[energy_levels > 0]
                self.row_embedding_ *= energy_levels
                self.col_embedding_ *= energy_levels
            elif self.scaling == 'barycenter':
                self.row_embedding_ *= self.singular_values_
            else:
                warnings.warn(Warning("The scaling must be 'multiply' or 'divide' or 'barycenter'. No scaling done."))

        self.embedding_ = np.vstack((self.row_embedding_, self.col_embedding_))
        return self
Beispiel #12
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'GSVD':
        """Compute the GSVD of the adjacency or biadjacency matrix.

        Parameters
        ----------
        adjacency :
            Adjacency or biadjacency matrix of the graph.

        Returns
        -------
        self: :class:`GSVD`
        """
        adjacency = check_format(adjacency).asfptype()
        n_row, n_col = adjacency.shape

        if self.n_components > min(n_row, n_col) - 1:
            n_components = min(n_row, n_col) - 1
            warnings.warn(
                Warning(
                    "The dimension of the embedding must be strictly less than the number of rows "
                    "and the number of columns. Changed accordingly."))
        else:
            n_components = self.n_components

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: SVDSolver = LanczosSVD()
            else:
                self.solver: SVDSolver = HalkoSVD()

        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * np.sum(
                    adjacency.data) / (n_row * n_col)
            adjacency_reg = SparseLR(
                adjacency, [(regularization * np.ones(n_row), np.ones(n_col))])
        else:
            adjacency_reg = adjacency

        weights_row = adjacency_reg.dot(np.ones(n_col))
        weights_col = adjacency_reg.T.dot(np.ones(n_row))
        diag_row = diag_pinv(np.power(weights_row, self.factor_row))
        diag_col = diag_pinv(np.power(weights_col, self.factor_col))
        self.solver.fit(
            safe_sparse_dot(diag_row, safe_sparse_dot(adjacency_reg,
                                                      diag_col)), n_components)

        singular_values = self.solver.singular_values_
        index = np.argsort(-singular_values)
        singular_values = singular_values[index]
        singular_vectors_left = self.solver.singular_vectors_left_[:, index]
        singular_vectors_right = self.solver.singular_vectors_right_[:, index]
        singular_left_diag = sparse.diags(
            np.power(singular_values, 1 - self.factor_singular))
        singular_right_diag = sparse.diags(
            np.power(singular_values, self.factor_singular))

        embedding_row = diag_row.dot(singular_vectors_left)
        embedding_col = diag_col.dot(singular_vectors_right)
        embedding_row = singular_left_diag.dot(embedding_row.T).T
        embedding_col = singular_right_diag.dot(embedding_col.T).T

        if self.normalized:
            embedding_row = normalize(embedding_row, p=2)
            embedding_col = normalize(embedding_col, p=2)

        self.embedding_row_ = embedding_row
        self.embedding_col_ = embedding_col
        self.embedding_ = embedding_row
        self.singular_values_ = singular_values
        self.singular_vectors_left_ = singular_vectors_left
        self.singular_vectors_right_ = singular_vectors_right
        self.regularization_ = regularization
        self.weights_col_ = weights_col

        return self
Beispiel #13
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Fits the model from data in adjacency.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """

        adjacency = check_format(adjacency).asfptype()

        if not is_square(adjacency):
            raise ValueError(
                'The adjacency matrix is not square. See BiSpectral.')

        if not is_symmetric(adjacency):
            raise ValueError(
                'The adjacency matrix is not symmetric.'
                'Either convert it to a symmetric matrix or use BiSpectral.')

        n = adjacency.shape[0]

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: EigSolver = LanczosEig()
            else:
                self.solver: EigSolver = HalkoEig()

        if self.embedding_dimension > n - 2:
            warnings.warn(
                Warning(
                    "The dimension of the embedding must be less than the number of nodes - 1."
                ))
            n_components = n - 2
        else:
            n_components = self.embedding_dimension + 1

        if (self.regularization is None
                or self.regularization == 0.) and not is_connected(adjacency):
            warnings.warn(
                Warning(
                    "The graph is not connected and low-rank regularization is not active."
                    "This can cause errors in the computation of the embedding."
                ))

        if isinstance(self.solver, HalkoEig) and not self.normalized_laplacian:
            raise NotImplementedError(
                "Halko solver is not yet compatible with regular Laplacian."
                "Call 'fit' with 'normalized_laplacian' = True or force lanczos solver."
            )

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        if self.normalized_laplacian:
            # Finding the largest eigenvalues of the normalized adjacency is easier for the solver than finding the
            # smallest eigenvalues of the normalized laplacian.
            normalizing_matrix = diag_pinv(np.sqrt(weights))

            if regularization:
                norm_adjacency = NormalizedAdjacencyOperator(
                    adjacency, regularization)
            else:
                norm_adjacency = normalizing_matrix.dot(
                    adjacency.dot(normalizing_matrix))

            self.solver.which = 'LA'
            self.solver.fit(matrix=norm_adjacency, n_components=n_components)
            eigenvalues = 1 - self.solver.eigenvalues_
            # eigenvalues of the Laplacian in increasing order
            index = np.argsort(eigenvalues)
            # skip first eigenvalue
            eigenvalues = eigenvalues[index][1:]
            # keep only positive eigenvectors of the normalized adjacency matrix
            eigenvectors = self.solver.eigenvectors_[:, index][:, 1:] * (
                eigenvalues < 1 - self.tol)
            embedding = np.array(normalizing_matrix.dot(eigenvectors))

        else:
            if regularization:
                laplacian = LaplacianOperator(adjacency, regularization)
            else:
                weight_matrix = sparse.diags(weights, format='csr')
                laplacian = weight_matrix - adjacency

            self.solver.which = 'SM'
            self.solver.fit(matrix=laplacian, n_components=n_components)
            eigenvalues = self.solver.eigenvalues_[1:]
            embedding = self.solver.eigenvectors_[:, 1:]

        if self.scaling:
            if self.scaling == 'multiply':
                eigenvalues = np.minimum(eigenvalues, 1)
                embedding *= np.sqrt(1 - eigenvalues)
            elif self.scaling == 'divide':
                inv_eigenvalues = np.zeros_like(eigenvalues)
                index = np.where(eigenvalues > 0)[0]
                inv_eigenvalues[index] = 1 / eigenvalues[index]
                embedding *= np.sqrt(inv_eigenvalues)
            else:
                warnings.warn(
                    Warning(
                        "The scaling must be 'multiply' or 'divide'. No scaling done."
                    ))

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.regularization_ = regularization

        return self
Beispiel #14
0
def comodularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, resolution: float = 1,
                 return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
    """Modularity of a clustering in the normalized co-neighborhood graph.

    * Graphs
    * Digraphs
    * Bigraphs

    Quality metric of a clustering given by:

    :math:`Q = \\sum_{i,j}\\left(\\dfrac{(AD_2^{-1}A^T)_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2}\\right)
    \\delta_{c_i,c_j}`

    where

    * :math:`c_i` is the cluster of node `i`,\n
    * :math:`\\delta` is the Kronecker symbol,\n
    * :math:`\\gamma \\ge 0` is the resolution parameter.

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    labels :
       Labels of the nodes.
    resolution :
        Resolution parameter (default = 1).
    return_all :
        If ``True``, return modularity, fit, diversity.

    Returns
    -------
    modularity : float
    fit : float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.clustering import comodularity
    >>> from sknetwork.data import house
    >>> adjacency = house()
    >>> labels = np.array([0, 0, 1, 1, 0])
    >>> np.round(comodularity(adjacency, labels), 2)
    0.06

    Notes
    -----
    Does not require the computation of the adjacency matrix of the normalized co-neighborhood graph.
    """

    adjacency = check_format(adjacency).astype(float)

    n_row, n_col = adjacency.shape
    total_weight = adjacency.data.sum()
    probs = adjacency.dot(np.ones(n_col)) / total_weight

    weights_col = adjacency.T.dot(np.ones(n_col))
    diag_col = diag_pinv(np.sqrt(weights_col))
    normalized_adjacency = (adjacency.dot(diag_col)).T.tocsr()

    if len(labels) != n_row:
        raise ValueError('The number of labels must match the number of rows.')

    membership = membership_matrix(labels)
    fit: float = ((normalized_adjacency.dot(membership)).data ** 2).sum() / total_weight
    div: float = np.linalg.norm(membership.T.dot(probs)) ** 2
    mod: float = fit - resolution * div

    if return_all:
        return mod, fit, div
    else:
        return mod
Beispiel #15
0
    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'Spectral':
        """Compute the graph embedding.

        Parameters
        ----------
        adjacency :
              Adjacency matrix of the graph (symmetric matrix).

        Returns
        -------
        self: :class:`Spectral`
        """
        adjacency = check_format(adjacency).asfptype()
        check_square(adjacency)
        check_symmetry(adjacency)
        n = adjacency.shape[0]

        if self.solver == 'auto':
            solver = auto_solver(adjacency.nnz)
            if solver == 'lanczos':
                self.solver: EigSolver = LanczosEig()
            else:  # pragma: no cover
                self.solver: EigSolver = HalkoEig()

        n_components = check_n_components(self.n_components, n - 2)
        n_components += 1

        if self.equalize and (self.regularization is None
                              or self.regularization
                              == 0.) and not is_connected(adjacency):
            raise ValueError(
                "The option 'equalize' is valid only if the graph is connected or with regularization."
                "Call 'fit' either with 'equalize' = False or positive 'regularization'."
            )

        weights = adjacency.dot(np.ones(n))
        regularization = self.regularization
        if regularization:
            if self.relative_regularization:
                regularization = regularization * weights.sum() / n**2
            weights += regularization * n

        if self.normalized_laplacian:
            # Finding the largest eigenvalues of the normalized adjacency is easier for the solver than finding the
            # smallest eigenvalues of the normalized laplacian.
            weights_inv_sqrt_diag = diag_pinv(np.sqrt(weights))

            if regularization:
                norm_adjacency = NormalizedAdjacencyOperator(
                    adjacency, regularization)
            else:
                norm_adjacency = weights_inv_sqrt_diag.dot(
                    adjacency.dot(weights_inv_sqrt_diag))

            self.solver.which = 'LA'
            self.solver.fit(matrix=norm_adjacency, n_components=n_components)
            eigenvalues = 1 - self.solver.eigenvalues_
            # eigenvalues of the Laplacian in increasing order
            index = np.argsort(eigenvalues)[1:]
            # skip first eigenvalue
            eigenvalues = eigenvalues[index]
            # eigenvectors of the Laplacian, skip first eigenvector
            eigenvectors = np.array(
                weights_inv_sqrt_diag.dot(self.solver.eigenvectors_[:, index]))

        else:
            if regularization:
                laplacian = LaplacianOperator(adjacency, regularization)
            else:
                weight_diag = sparse.diags(weights, format='csr')
                laplacian = weight_diag - adjacency

            self.solver.which = 'SM'
            self.solver.fit(matrix=laplacian, n_components=n_components)
            eigenvalues = self.solver.eigenvalues_[1:]
            eigenvectors = self.solver.eigenvectors_[:, 1:]

        embedding = eigenvectors.copy()

        if self.equalize:
            eigenvalues_sqrt_inv_diag = diag_pinv(np.sqrt(eigenvalues))
            embedding = eigenvalues_sqrt_inv_diag.dot(embedding.T).T

        if self.barycenter:
            eigenvalues_diag = sparse.diags(eigenvalues)
            subtract = eigenvalues_diag.dot(embedding.T).T
            if not self.normalized_laplacian:
                weights_inv_diag = diag_pinv(weights)
                subtract = weights_inv_diag.dot(subtract)
            embedding -= subtract

        if self.normalized:
            embedding = normalize(embedding, p=2)

        self.embedding_ = embedding
        self.eigenvalues_ = eigenvalues
        self.eigenvectors_ = eigenvectors
        self.regularization_ = regularization

        return self