class LouvainHierarchy(BaseHierarchy): """Hierarchical clustering by successive instances of Louvain (top-down). * Graphs * Digraphs Parameters ---------- depth : Depth of the tree. A negative value is interpreted as no limit (return a tree of maximum depth). resolution : Resolution parameter. tol_optimization : Minimum increase in the objective function to enter a new optimization pass. tol_aggregation : Minimum increase in the objective function to enter a new aggregation pass. n_aggregations : Maximum number of aggregations. A negative value is interpreted as no limit. shuffle_nodes : Enables node shuffling before optimization. random_state : Random number generator or random seed. If ``None``, numpy.random is used. verbose : Verbose mode. Attributes ---------- dendrogram_ : np.ndarray Dendrogram. Example ------- >>> from sknetwork.hierarchy import LouvainHierarchy >>> from sknetwork.data import house >>> louvain = LouvainHierarchy() >>> adjacency = house() >>> louvain.fit_transform(adjacency) array([[3., 2., 0., 2.], [4., 1., 0., 2.], [6., 0., 0., 3.], [5., 7., 1., 5.]]) Notes ----- Each row of the dendrogram = merge nodes, distance, size of cluster. See Also -------- scipy.cluster.hierarchy.dendrogram """ def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainHierarchy, self).__init__() self.depth = depth self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=n_aggregations, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int, nodes: Optional[np.ndarray] = None): """Recursive function for fit. Parameters ---------- adjacency : Adjacency matrix of the graph. depth : Depth of the recursion. nodes : The current nodes index in the original graph. Returns ------- tree: :class:`Tree` """ n = adjacency.shape[0] if nodes is None: nodes = np.arange(n) if adjacency.nnz and depth: labels = self._clustering_method.fit_transform(adjacency) else: labels = np.zeros(n) clusters = np.unique(labels) result = [] if len(clusters) == 1: if len(nodes) > 1: return [[node] for node in nodes] else: return [nodes[0]] else: for cluster in clusters: mask = (labels == cluster) nodes_cluster = nodes[mask] adjacency_cluster = adjacency[mask, :][:, mask] result.append( self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster)) return result def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainHierarchy': """Fit algorithm to data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`LouvainHierarchy` """ adjacency = check_format(adjacency) check_square(adjacency) tree = self._recursive_louvain(adjacency, self.depth) dendrogram, _ = get_dendrogram(tree) dendrogram = np.array(dendrogram) dendrogram[:, 2] -= min(dendrogram[:, 2]) self.dendrogram_ = reorder_dendrogram(dendrogram) return self
class LouvainNE(BaseEmbedding): """Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level. Parameters ---------- n_components : int Dimension of the embedding. scale : float Dilution factor to be applied on the random vector to be added at each iteration of the clustering method. resolution : Resolution parameter. tol_optimization : Minimum increase in the objective function to enter a new optimization pass. tol_aggregation : Minimum increase in the objective function to enter a new aggregation pass. n_aggregations : Maximum number of aggregations. A negative value is interpreted as no limit. shuffle_nodes : Enables node shuffling before optimization. random_state : Random number generator or random seed. If None, numpy.random is used. Attributes ---------- embedding_ : array, shape = (n, n_components) Embedding of the nodes. embedding_row_ : array, shape = (n_row, n_components) Embedding of the rows, for bipartite graphs. embedding_col_ : array, shape = (n_col, n_components) Embedding of the columns, for bipartite graphs. Example ------- >>> from sknetwork.embedding import LouvainNE >>> from sknetwork.data import karate_club >>> louvain = LouvainNE(n_components=3) >>> adjacency = karate_club() >>> embedding = louvain.fit_transform(adjacency) >>> embedding.shape (34, 3) References ---------- Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January). `LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding. <https://hal.archives-ouvertes.fr/hal-02999888/document>`_ In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51). """ def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainNE, self).__init__() self.n_components = n_components self.scale = scale self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=n_aggregations, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) self.random_state = check_random_state(random_state) self.bipartite = None def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int, nodes: Optional[np.ndarray] = None): """Recursive function for fit, modifies the embedding in place. Parameters ---------- adjacency : Adjacency matrix of the graph. depth : Depth of the recursion. nodes : The indices of the current nodes in the original graph. """ n = adjacency.shape[0] if nodes is None: nodes = np.arange(n) if adjacency.nnz: labels = self._clustering_method.fit_transform(adjacency) else: labels = np.zeros(n) clusters = np.unique(labels) if len(clusters) != 1: random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters)) for index, cluster in enumerate(clusters): mask = (labels == cluster) nodes_cluster = nodes[mask] self.embedding_[nodes_cluster, :] += random_vectors[:, index] n_row = len(mask) indptr = np.zeros(n_row + 1, dtype=int) indptr[1:] = np.cumsum(mask) n_col = indptr[-1] combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr), shape=(n_row, n_col)) adjacency_cluster = adjacency[mask, :].dot(combiner) self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster) def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False): """Embedding of graphs from a clustering obtained with Louvain. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : If ``True``, force the input matrix to be considered as a biadjacency matrix even if square. Returns ------- self: :class:`LouvainNE` """ # input adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite) n = adjacency.shape[0] # embedding self.embedding_ = np.zeros((n, self.n_components)) self._recursive_louvain(adjacency, 0) if self.bipartite: self._split_vars(input_matrix.shape) return self