예제 #1
0
    def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
        super(LouvainHierarchy, self).__init__()

        self.depth = depth
        self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
    def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
        super(LouvainNE, self).__init__()

        self.n_components = n_components
        self.scale = scale
        self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
        self.random_state = check_random_state(random_state)
        self.bipartite = None
    def fit(self,
            input_matrix: sparse.csr_matrix,
            force_bipartite: bool = False):
        """Embedding of graphs from the clustering obtained with Louvain.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite : bool (default = ``False``)
            If ``True``, force the input matrix to be considered as a biadjacency matrix.
        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        louvain = Louvain(resolution=self.resolution,
                          modularity=self.modularity,
                          tol_optimization=self.tol_optimization,
                          tol_aggregation=self.tol_aggregation,
                          n_aggregations=self.n_aggregations,
                          shuffle_nodes=self.shuffle_nodes,
                          sort_clusters=False,
                          return_membership=True,
                          return_aggregate=True,
                          random_state=self.random_state)
        louvain.fit(input_matrix, force_bipartite=force_bipartite)

        # isolated nodes
        if is_square(input_matrix):
            labels = louvain.labels_
            labels_secondary = None
        else:
            labels = louvain.labels_col_
            labels_secondary = louvain.labels_row_

        self.labels_, labels_row = reindex_labels(labels, labels_secondary,
                                                  self.isolated_nodes)

        # embedding
        probs = normalize(input_matrix)
        embedding_ = probs.dot(membership_matrix(self.labels_))
        self.embedding_ = embedding_.toarray()

        if labels_row is not None:
            probs = normalize(input_matrix.T)
            embedding_col = probs.dot(membership_matrix(labels_row))
            self.embedding_row_ = self.embedding_
            self.embedding_col_ = embedding_col.toarray()

        return self
예제 #4
0
    def fit(self, adjacency: sparse.csr_matrix):
        """Embedding of bipartite graphs from a clustering obtained with Louvain.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`BiLouvainEmbedding`
        """
        louvain = Louvain(resolution=self.resolution,
                          modularity=self.modularity,
                          tol_optimization=self.tol_optimization,
                          tol_aggregation=self.tol_aggregation,
                          n_aggregations=self.n_aggregations,
                          shuffle_nodes=self.shuffle_nodes,
                          sort_clusters=True,
                          return_membership=True,
                          return_aggregate=True,
                          random_state=self.random_state)
        louvain.fit(adjacency)

        self.labels_ = louvain.labels_

        embedding_ = louvain.membership_

        if self.isolated_nodes in ['remove', 'merge']:
            # remove or merge isolated nodes and reindex labels
            labels_unique, counts = np.unique(louvain.labels_,
                                              return_counts=True)
            n_labels = max(labels_unique) + 1
            labels_old = labels_unique[counts > 1]
            if self.isolated_nodes == 'remove':
                labels_new = -np.ones(n_labels, dtype='int')
            else:
                labels_new = len(labels_old) * np.ones(n_labels, dtype='int')
            labels_new[labels_old] = np.arange(len(labels_old))
            labels_ = labels_new[louvain.labels_]

            # get embeddings
            probs = normalize(adjacency)
            embedding_ = probs.dot(membership_matrix(labels_))

        self.embedding_ = embedding_.toarray()

        return self
예제 #5
0
class LouvainHierarchy(BaseHierarchy):
    """Hierarchical clustering by successive instances of Louvain (top-down).

    * Graphs
    * Digraphs

    Parameters
    ----------
    depth :
        Depth of the tree.
        A negative value is interpreted as no limit (return a tree of maximum depth).
    resolution :
        Resolution parameter.
    tol_optimization :
        Minimum increase in the objective function to enter a new optimization pass.
    tol_aggregation :
        Minimum increase in the objective function to enter a new aggregation pass.
    n_aggregations :
        Maximum number of aggregations.
        A negative value is interpreted as no limit.
    shuffle_nodes :
        Enables node shuffling before optimization.
    random_state :
        Random number generator or random seed. If ``None``, numpy.random is used.
    verbose :
        Verbose mode.

    Attributes
    ----------
    dendrogram_ : np.ndarray
        Dendrogram.

    Example
    -------
    >>> from sknetwork.hierarchy import LouvainHierarchy
    >>> from sknetwork.data import house
    >>> louvain = LouvainHierarchy()
    >>> adjacency = house()
    >>> louvain.fit_transform(adjacency)
    array([[3., 2., 0., 2.],
           [4., 1., 0., 2.],
           [6., 0., 0., 3.],
           [5., 7., 1., 5.]])

    Notes
    -----
    Each row of the dendrogram = merge nodes, distance, size of cluster.

    See Also
    --------
    scipy.cluster.hierarchy.dendrogram
    """
    def __init__(self,
                 depth: int = 3,
                 resolution: float = 1,
                 tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3,
                 n_aggregations: int = -1,
                 shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState,
                                              int]] = None,
                 verbose: bool = False):
        super(LouvainHierarchy, self).__init__()

        self.depth = depth
        self._clustering_method = Louvain(resolution=resolution,
                                          tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation,
                                          n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes,
                                          random_state=random_state,
                                          verbose=verbose)

    def _recursive_louvain(self,
                           adjacency: Union[sparse.csr_matrix, np.ndarray],
                           depth: int,
                           nodes: Optional[np.ndarray] = None):
        """Recursive function for fit.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        depth :
            Depth of the recursion.
        nodes :
            The current nodes index in the original graph.

        Returns
        -------
        tree: :class:`Tree`
        """
        n = adjacency.shape[0]
        if nodes is None:
            nodes = np.arange(n)

        if adjacency.nnz and depth:
            labels = self._clustering_method.fit_transform(adjacency)
        else:
            labels = np.zeros(n)

        clusters = np.unique(labels)

        result = []
        if len(clusters) == 1:
            if len(nodes) > 1:
                return [[node] for node in nodes]
            else:
                return [nodes[0]]
        else:
            for cluster in clusters:
                mask = (labels == cluster)
                nodes_cluster = nodes[mask]
                adjacency_cluster = adjacency[mask, :][:, mask]
                result.append(
                    self._recursive_louvain(adjacency_cluster, depth - 1,
                                            nodes_cluster))
            return result

    def fit(self, adjacency: Union[sparse.csr_matrix,
                                   np.ndarray]) -> 'LouvainHierarchy':
        """Fit algorithm to data.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`LouvainHierarchy`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)

        tree = self._recursive_louvain(adjacency, self.depth)
        dendrogram, _ = get_dendrogram(tree)
        dendrogram = np.array(dendrogram)
        dendrogram[:, 2] -= min(dendrogram[:, 2])

        self.dendrogram_ = reorder_dendrogram(dendrogram)

        return self
예제 #6
0
def svg_bigraph(biadjacency: sparse.csr_matrix,
                names_row: Optional[np.ndarray] = None,
                names_col: Optional[np.ndarray] = None,
                labels_row: Optional[Union[dict, np.ndarray]] = None,
                labels_col: Optional[Union[dict, np.ndarray]] = None,
                scores_row: Optional[Union[dict, np.ndarray]] = None,
                scores_col: Optional[Union[dict, np.ndarray]] = None,
                membership_row: Optional[sparse.csr_matrix] = None,
                membership_col: Optional[sparse.csr_matrix] = None,
                seeds_row: Union[list, dict] = None,
                seeds_col: Union[list, dict] = None,
                position_row: Optional[np.ndarray] = None,
                position_col: Optional[np.ndarray] = None,
                reorder: bool = True,
                width: Optional[float] = 400,
                height: Optional[float] = 300,
                margin: float = 20,
                margin_text: float = 3,
                scale: float = 1,
                node_size: float = 7,
                node_size_min: float = 1,
                node_size_max: float = 20,
                display_node_weight: bool = False,
                node_weights_row: Optional[np.ndarray] = None,
                node_weights_col: Optional[np.ndarray] = None,
                node_width: float = 1,
                node_width_max: float = 3,
                color_row: str = 'gray',
                color_col: str = 'gray',
                label_colors: Optional[Iterable] = None,
                display_edges: bool = True,
                edge_labels: Optional[list] = None,
                edge_width: float = 1,
                edge_width_min: float = 0.5,
                edge_width_max: float = 10,
                edge_color: str = 'black',
                display_edge_weight: bool = True,
                font_size: int = 12,
                filename: Optional[str] = None) -> str:
    """Return SVG image of a bigraph.

    Parameters
    ----------
    biadjacency :
        Biadjacency matrix of the graph.
    names_row :
        Names of the rows.
    names_col :
        Names of the columns.
    labels_row :
        Labels of the rows (negative values mean no label).
    labels_col :
        Labels of the columns (negative values mean no label).
    scores_row :
        Scores of the rows (measure of importance).
    scores_col :
        Scores of the columns (measure of importance).
    membership_row :
        Membership of the rows (label distribution).
    membership_col :
        Membership of the columns (label distribution).
    seeds_row :
        Rows to be highlighted (if dict, only keys are considered).
    seeds_col :
        Columns to be highlighted (if dict, only keys are considered).
    position_row :
        Positions of the rows.
    position_col :
        Positions of the columns.
    reorder :
        Use clustering to order nodes.
    width :
        Width of the image.
    height :
        Height of the image.
    margin :
        Margin of the image.
    margin_text :
        Margin between node and text.
    scale :
        Multiplicative factor on the dimensions of the image.
    node_size :
        Size of nodes.
    node_size_min :
        Minimum size of nodes.
    node_size_max :
        Maximum size of nodes.
    display_node_weight :
        If ``True``, display node weights through node size.
    node_weights_row :
        Weights of rows (used only if **display_node_weight** is ``True``).
    node_weights_col :
        Weights of columns (used only if **display_node_weight** is ``True``).
    node_width :
        Width of node circle.
    node_width_max :
        Maximum width of node circle.
    color_row :
        Default color of rows (svg color).
    color_col :
        Default color of cols (svg color).
    label_colors :
        Colors of the labels (svg color).
    display_edges :
        If ``True``, display edges.
    edge_labels :
        Labels of the edges, as a list of tuples (source, destination, label)
    edge_width :
        Width of edges.
    edge_width_min :
        Minimum width of edges.
    edge_width_max :
        Maximum width of edges.
    display_edge_weight :
        If ``True``, display edge weights through edge widths.
    edge_color :
        Default color of edges (svg color).
    font_size :
        Font size.
    filename :
        Filename for saving image (optional).

    Returns
    -------
    image : str
        SVG image.

    Example
    -------
    >>> from sknetwork.data import movie_actor
    >>> biadjacency = movie_actor()
    >>> from sknetwork.visualization import svg_bigraph
    >>> image = svg_bigraph(biadjacency)
    >>> image[1:4]
    'svg'
    """
    n_row, n_col = biadjacency.shape

    # node positions
    if position_row is None or position_col is None:
        position_row = np.zeros((n_row, 2))
        position_col = np.ones((n_col, 2))
        if reorder:
            louvain = Louvain()
            louvain.fit(biadjacency, force_bipartite=True)
            index_row = np.argsort(louvain.labels_row_)
            index_col = np.argsort(louvain.labels_col_)
        else:
            index_row = np.arange(n_row)
            index_col = np.arange(n_col)
        position_row[index_row, 1] = np.arange(n_row)
        position_col[index_col, 1] = np.arange(n_col) + .5 * (n_row - n_col)
    position = np.vstack((position_row, position_col))

    # node colors
    if scores_row is not None and scores_col is not None:
        if isinstance(scores_row, dict):
            scores_row = np.array(list(scores_row.values()))
        if isinstance(scores_col, dict):
            scores_col = np.array(list(scores_col.values()))
        scores = np.hstack((scores_row, scores_col))
        score_min = np.min(scores)
        score_max = np.max(scores)
    else:
        score_min = None
        score_max = None

    colors_row = get_node_colors(n_row, labels_row, scores_row, membership_row,
                                 color_row, label_colors, score_min, score_max)
    colors_col = get_node_colors(n_col, labels_col, scores_col, membership_col,
                                 color_col, label_colors, score_min, score_max)

    # node sizes
    if node_weights_row is None:
        node_weights_row = biadjacency.dot(np.ones(n_col))
    if node_weights_col is None:
        node_weights_col = biadjacency.T.dot(np.ones(n_row))
    node_sizes_row, node_sizes_col = get_node_sizes_bipartite(
        node_weights_row, node_weights_col, node_size, node_size_min,
        node_size_max, display_node_weight)

    # node widths
    node_widths_row = get_node_widths(n_row, seeds_row, node_width,
                                      node_width_max)
    node_widths_col = get_node_widths(n_col, seeds_col, node_width,
                                      node_width_max)

    # rescaling
    if not width and not height:
        raise ValueError(
            "You must specify either the width or the height of the image.")
    position, width, height = rescale(position, width, height, margin,
                                      node_size, node_size_max,
                                      display_node_weight)

    # node names
    if names_row is not None:
        text_length = np.max(np.array([len(str(name)) for name in names_row]))
        position[:, 0] += text_length * font_size * .5
        width += text_length * font_size * .5
    if names_col is not None:
        text_length = np.max(np.array([len(str(name)) for name in names_col]))
        width += text_length * font_size * .5

    # scaling
    position *= scale
    height *= scale
    width *= scale
    position_row = position[:n_row]
    position_col = position[n_row:]

    svg = """<svg width="{}" height="{}"  xmlns="http://www.w3.org/2000/svg">\n""".format(
        width, height)

    # edges
    if display_edges:
        biadjacency_coo = sparse.coo_matrix(biadjacency)

        if edge_color is None:
            if names_row is None and names_col is None:
                edge_color = 'black'
            else:
                edge_color = 'gray'

        edge_colors, edge_order, edge_colors_residual = get_edge_colors(
            biadjacency, edge_labels, edge_color, label_colors)
        edge_widths = get_edge_widths(biadjacency_coo, edge_width,
                                      edge_width_min, edge_width_max,
                                      display_edge_weight)

        for ix in edge_order:
            i = biadjacency_coo.row[ix]
            j = biadjacency_coo.col[ix]
            color = edge_colors[ix]
            svg += svg_edge(pos_1=position_row[i],
                            pos_2=position_col[j],
                            edge_width=edge_widths[ix],
                            edge_color=color)

        for i, j, color in edge_colors_residual:
            svg += svg_edge(pos_1=position_row[i],
                            pos_2=position_col[j],
                            edge_width=edge_width,
                            edge_color=color)

    # nodes
    for i in range(n_row):
        if membership_row is None:
            svg += svg_node(position_row[i], node_sizes_row[i], colors_row[i],
                            node_widths_row[i])
        else:
            if membership_row[i].nnz == 1:
                index = membership_row[i].indices[0]
                svg += svg_node(position_row[i], node_sizes_row[i],
                                colors_row[index], node_widths_row[i])
            else:
                svg += svg_pie_chart_node(position_row[i], node_sizes_row[i],
                                          membership_row[i].todense(),
                                          colors_row, node_widths_row[i])

    for i in range(n_col):
        if membership_col is None:
            svg += svg_node(position_col[i], node_sizes_col[i], colors_col[i],
                            node_widths_col[i])
        else:
            if membership_col[i].nnz == 1:
                index = membership_col[i].indices[0]
                svg += svg_node(position_col[i], node_sizes_col[i],
                                colors_col[index], node_widths_col[i])
            else:
                svg += svg_pie_chart_node(position_col[i], node_sizes_col[i],
                                          membership_col[i].todense(),
                                          colors_col, node_widths_col[i])
    # text
    if names_row is not None:
        for i in range(n_row):
            svg += svg_text(position_row[i], names_row[i],
                            margin_text + node_sizes_row[i], font_size, 'left')
    if names_col is not None:
        for i in range(n_col):
            svg += svg_text(position_col[i], names_col[i],
                            margin_text + node_sizes_col[i], font_size)
    svg += """</svg>\n"""

    if filename is not None:
        with open(filename + '.svg', 'w') as f:
            f.write(svg)

    return svg
class LouvainNE(BaseEmbedding):
    """Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level.

    Parameters
    ----------
    n_components : int
        Dimension of the embedding.
    scale : float
        Dilution factor to be applied on the random vector to be added at each iteration of the clustering method.
    resolution :
        Resolution parameter.
    tol_optimization :
        Minimum increase in the objective function to enter a new optimization pass.
    tol_aggregation :
        Minimum increase in the objective function to enter a new aggregation pass.
    n_aggregations :
        Maximum number of aggregations.
        A negative value is interpreted as no limit.
    shuffle_nodes :
        Enables node shuffling before optimization.
    random_state :
        Random number generator or random seed. If None, numpy.random is used.

    Attributes
    ----------
    embedding_ : array, shape = (n, n_components)
        Embedding of the nodes.
    embedding_row_ : array, shape = (n_row, n_components)
        Embedding of the rows, for bipartite graphs.
    embedding_col_ : array, shape = (n_col, n_components)
        Embedding of the columns, for bipartite graphs.
    Example
    -------
    >>> from sknetwork.embedding import LouvainNE
    >>> from sknetwork.data import karate_club
    >>> louvain = LouvainNE(n_components=3)
    >>> adjacency = karate_club()
    >>> embedding = louvain.fit_transform(adjacency)
    >>> embedding.shape
    (34, 3)

    References
    ----------
    Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January).
    `LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding.
    <https://hal.archives-ouvertes.fr/hal-02999888/document>`_
    In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51).
    """
    def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
                 tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                 random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
        super(LouvainNE, self).__init__()

        self.n_components = n_components
        self.scale = scale
        self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
                                          tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
                                          shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
        self.random_state = check_random_state(random_state)
        self.bipartite = None

    def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
                           nodes: Optional[np.ndarray] = None):
        """Recursive function for fit, modifies the embedding in place.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        depth :
            Depth of the recursion.
        nodes :
            The indices of the current nodes in the original graph.
        """
        n = adjacency.shape[0]
        if nodes is None:
            nodes = np.arange(n)

        if adjacency.nnz:
            labels = self._clustering_method.fit_transform(adjacency)
        else:
            labels = np.zeros(n)

        clusters = np.unique(labels)

        if len(clusters) != 1:
            random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters))
            for index, cluster in enumerate(clusters):
                mask = (labels == cluster)
                nodes_cluster = nodes[mask]
                self.embedding_[nodes_cluster, :] += random_vectors[:, index]
                n_row = len(mask)
                indptr = np.zeros(n_row + 1, dtype=int)
                indptr[1:] = np.cumsum(mask)
                n_col = indptr[-1]
                combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr),
                                             shape=(n_row, n_col))
                adjacency_cluster = adjacency[mask, :].dot(combiner)
                self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster)

    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False):
        """Embedding of graphs from a clustering obtained with Louvain.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite :
            If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
        Returns
        -------
        self: :class:`LouvainNE`
        """
        # input
        adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
        n = adjacency.shape[0]

        # embedding
        self.embedding_ = np.zeros((n, self.n_components))
        self._recursive_louvain(adjacency, 0)

        if self.bipartite:
            self._split_vars(input_matrix.shape)
        return self
예제 #8
0
    def __init__(self, **kwargs):
        super(LouvainHierarchy, self).__init__()

        self._clustering_method = Louvain(**kwargs)