예제 #1
0
class LouvainHierarchy(BaseHierarchy):
    """Hierarchical clustering by successive instances of Louvain (top-down).

    * Graphs
    * Digraphs

    Parameters
    ----------
    depth :
        Depth of the tree.
        A negative value is interpreted as no limit (return a tree of maximum depth).
    resolution :
        Resolution parameter.
    tol_optimization :
        Minimum increase in the objective function to enter a new optimization pass.
    tol_aggregation :
        Minimum increase in the objective function to enter a new aggregation pass.
    n_aggregations :
        Maximum number of aggregations.
        A negative value is interpreted as no limit.
    shuffle_nodes :
        Enables node shuffling before optimization.
    random_state :
        Random number generator or random seed. If ``None``, numpy.random is used.
    verbose :
        Verbose mode.

    Attributes
    ----------
    dendrogram_ : np.ndarray
        Dendrogram.

    Example
    -------
    >>> from sknetwork.hierarchy import LouvainHierarchy
    >>> from sknetwork.data import house
    >>> louvain = LouvainHierarchy()
    >>> adjacency = house()
    >>> louvain.fit_transform(adjacency)
    array([[3., 2., 0., 2.],
           [4., 1., 0., 2.],
           [6., 0., 0., 3.],
           [5., 7., 1., 5.]])

    Notes
    -----
    Each row of the dendrogram = merge nodes, distance, size of cluster.

    See Also
    --------
    scipy.cluster.hierarchy.dendrogram
    """
    def __init__(self,
                 depth: int = 3,
                 resolution: float = 1,
                 tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3,
                 n_aggregations: int = -1,
                 shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState,
                                              int]] = None,
                 verbose: bool = False):
        super(LouvainHierarchy, self).__init__()

        self.depth = depth
        self._clustering_method = Louvain(resolution=resolution,
                                          tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation,
                                          n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes,
                                          random_state=random_state,
                                          verbose=verbose)

    def _recursive_louvain(self,
                           adjacency: Union[sparse.csr_matrix, np.ndarray],
                           depth: int,
                           nodes: Optional[np.ndarray] = None):
        """Recursive function for fit.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        depth :
            Depth of the recursion.
        nodes :
            The current nodes index in the original graph.

        Returns
        -------
        tree: :class:`Tree`
        """
        n = adjacency.shape[0]
        if nodes is None:
            nodes = np.arange(n)

        if adjacency.nnz and depth:
            labels = self._clustering_method.fit_transform(adjacency)
        else:
            labels = np.zeros(n)

        clusters = np.unique(labels)

        result = []
        if len(clusters) == 1:
            if len(nodes) > 1:
                return [[node] for node in nodes]
            else:
                return [nodes[0]]
        else:
            for cluster in clusters:
                mask = (labels == cluster)
                nodes_cluster = nodes[mask]
                adjacency_cluster = adjacency[mask, :][:, mask]
                result.append(
                    self._recursive_louvain(adjacency_cluster, depth - 1,
                                            nodes_cluster))
            return result

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'LouvainHierarchy':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`LouvainHierarchy`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)

        tree = self._recursive_louvain(adjacency, self.depth)
        dendrogram, _ = get_dendrogram(tree)
        dendrogram = np.array(dendrogram)
        dendrogram[:, 2] -= min(dendrogram[:, 2])

        self.dendrogram_ = reorder_dendrogram(dendrogram)

        return self
class LouvainNE(BaseEmbedding):
    """Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level.

    Parameters
    ----------
    n_components : int
        Dimension of the embedding.
    scale : float
        Dilution factor to be applied on the random vector to be added at each iteration of the clustering method.
    resolution :
        Resolution parameter.
    tol_optimization :
        Minimum increase in the objective function to enter a new optimization pass.
    tol_aggregation :
        Minimum increase in the objective function to enter a new aggregation pass.
    n_aggregations :
        Maximum number of aggregations.
        A negative value is interpreted as no limit.
    shuffle_nodes :
        Enables node shuffling before optimization.
    random_state :
        Random number generator or random seed. If None, numpy.random is used.

    Attributes
    ----------
    embedding_ : array, shape = (n, n_components)
        Embedding of the nodes.
    embedding_row_ : array, shape = (n_row, n_components)
        Embedding of the rows, for bipartite graphs.
    embedding_col_ : array, shape = (n_col, n_components)
        Embedding of the columns, for bipartite graphs.
    Example
    -------
    >>> from sknetwork.embedding import LouvainNE
    >>> from sknetwork.data import karate_club
    >>> louvain = LouvainNE(n_components=3)
    >>> adjacency = karate_club()
    >>> embedding = louvain.fit_transform(adjacency)
    >>> embedding.shape
    (34, 3)

    References
    ----------
    Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January).
    `LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding.
    <https://hal.archives-ouvertes.fr/hal-02999888/document>`_
    In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51).
    """
    def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
        super(LouvainNE, self).__init__()

        self.n_components = n_components
        self.scale = scale
        self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
        self.random_state = check_random_state(random_state)
        self.bipartite = None

    def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
                           nodes: Optional[np.ndarray] = None):
        """Recursive function for fit, modifies the embedding in place.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        depth :
            Depth of the recursion.
        nodes :
            The indices of the current nodes in the original graph.
        """
        n = adjacency.shape[0]
        if nodes is None:
            nodes = np.arange(n)

        if adjacency.nnz:
            labels = self._clustering_method.fit_transform(adjacency)
        else:
            labels = np.zeros(n)

        clusters = np.unique(labels)

        if len(clusters) != 1:
            random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters))
            for index, cluster in enumerate(clusters):
                mask = (labels == cluster)
                nodes_cluster = nodes[mask]
                self.embedding_[nodes_cluster, :] += random_vectors[:, index]
                n_row = len(mask)
                indptr = np.zeros(n_row + 1, dtype=int)
                indptr[1:] = np.cumsum(mask)
                n_col = indptr[-1]
                combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr),
                                             shape=(n_row, n_col))
                adjacency_cluster = adjacency[mask, :].dot(combiner)
                self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster)

    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False):
        """Embedding of graphs from a clustering obtained with Louvain.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite :
            If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
        Returns
        -------
        self: :class:`LouvainNE`
        """
        # input
        adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
        n = adjacency.shape[0]

        # embedding
        self.embedding_ = np.zeros((n, self.n_components))
        self._recursive_louvain(adjacency, 0)

        if self.bipartite:
            self._split_vars(input_matrix.shape)
        return self