Beispiel #1
0
 def setUp(self):
     self.louvain = Louvain(engine='python')
     self.bilouvain = BiLouvain(engine='python')
     if is_numba_available:
         self.louvain_numba = Louvain(engine='numba')
         self.bilouvain_numba = BiLouvain(engine='numba')
     else:
         with self.assertRaises(ValueError):
             Louvain(engine='numba')
Beispiel #2
0
    def test_bilouvain(self):
        biadjacency = star_wars()
        adjacency = bipartite2undirected(biadjacency)

        louvain = Louvain(modularity='newman')
        bilouvain = BiLouvain(modularity='newman')

        labels1 = louvain.fit_transform(adjacency)
        bilouvain.fit(biadjacency)
        labels2 = np.concatenate((bilouvain.labels_row_, bilouvain.labels_col_))
        self.assertTrue((labels1 == labels2).all())
    def fit_predict(self, base_clusters, modal_graphs):
        ba_matrices =[]
        idxs = []
        idxs_iter = 0
        
        if isinstance(clusters, pd.DataFrame):
            clusters = [clusters.loc[:,i].to_numpy(dtype=int) for i in clusters.columns]
        else:
            clusters = [clustering.astype(np.int64) for clustering in clusters]
        
        for base_cluster in base_clusters:
            ba_matrix = np.zeros((base_cluster.size, base_cluster.max()+1))
            ba_matrix[np.arange(base_cluster.size), base_cluster] = 1
            ba_matrices.append(ba_matrix)
            idxs.append([idxs_iter, idxs_iter+ba_matrix.shape[1]])
            idxs_iter += ba_matrix.shape[1]
            
        ba_matrix = np.concatenate(ba_matrices, axis=1)
        
        
        for G in modal_graphs:
            self.W.append(normalize(csr_matrix(G).T, norm='l1', axis=1, copy=True))
            
        ca_matrix = ba_matrix.copy()

        for t in range(self.iterations):
            temp = []
            
            for G_m in self.W:
                temp.append(self.alpha * G_m @ ca_matrix + (1-self.alpha)* ba_matrix )
                
            ca_matrix = np.mean(temp, axis=0)

        self._ca_matrix = ca_matrix
        clstr = BiLouvain()
        clstr.fit(self._ca_matrix)
        self._object_labels = clstr.row_labels_
        self._cluster_labels = clstr.col_labels_
        self._quality = bimodularity(self._ca_matrix, self._object_labels, 
                                      self._cluster_labels)
        return self._object_labels
Beispiel #4
0
def svg_bigraph(biadjacency: sparse.csr_matrix,
                names_row: Optional[np.ndarray] = None, names_col: Optional[np.ndarray] = None,
                labels_row: Optional[Union[dict, np.ndarray]] = None,
                labels_col: Optional[Union[dict, np.ndarray]] = None,
                scores_row: Optional[Union[dict, np.ndarray]] = None,
                scores_col: Optional[Union[dict, np.ndarray]] = None,
                membership_row: Optional[sparse.csr_matrix] = None,
                membership_col: Optional[sparse.csr_matrix] = None,
                seeds_row: Union[list, dict] = None, seeds_col: Union[list, dict] = None,
                position_row: Optional[np.ndarray] = None, position_col: Optional[np.ndarray] = None,
                reorder: bool = True, width: Optional[float] = 400,
                height: Optional[float] = 300, margin: float = 20, margin_text: float = 3, scale: float = 1,
                node_size: float = 7, node_size_min: float = 1, node_size_max: float = 20,
                display_node_weight: bool = False,
                node_weights_row: Optional[np.ndarray] = None, node_weights_col: Optional[np.ndarray] = None,
                node_width: float = 1, node_width_max: float = 3,
                color_row: str = 'gray', color_col: str = 'gray', label_colors: Optional[Iterable] = None,
                display_edges: bool = True, edge_labels: Optional[list] = None, edge_width: float = 1,
                edge_width_min: float = 0.5, edge_width_max: float = 10, edge_color: str = 'black',
                display_edge_weight: bool = True,
                font_size: int = 12, filename: Optional[str] = None) -> str:
    """Return SVG image of a bigraph.

    Parameters
    ----------
    biadjacency :
        Biadjacency matrix of the graph.
    names_row :
        Names of the rows.
    names_col :
        Names of the columns.
    labels_row :
        Labels of the rows (negative values mean no label).
    labels_col :
        Labels of the columns (negative values mean no label).
    scores_row :
        Scores of the rows (measure of importance).
    scores_col :
        Scores of the columns (measure of importance).
    membership_row :
        Membership of the rows (label distribution).
    membership_col :
        Membership of the columns (label distribution).
    seeds_row :
        Rows to be highlighted (if dict, only keys are considered).
    seeds_col :
        Columns to be highlighted (if dict, only keys are considered).
    position_row :
        Positions of the rows.
    position_col :
        Positions of the columns.
    reorder :
        Use clustering to order nodes.
    width :
        Width of the image.
    height :
        Height of the image.
    margin :
        Margin of the image.
    margin_text :
        Margin between node and text.
    scale :
        Multiplicative factor on the dimensions of the image.
    node_size :
        Size of nodes.
    node_size_min :
        Minimum size of nodes.
    node_size_max :
        Maximum size of nodes.
    display_node_weight :
        If ``True``, display node weights through node size.
    node_weights_row :
        Weights of rows (used only if **display_node_weight** is ``True``).
    node_weights_col :
        Weights of columns (used only if **display_node_weight** is ``True``).
    node_width :
        Width of node circle.
    node_width_max :
        Maximum width of node circle.
    color_row :
        Default color of rows (svg color).
    color_col :
        Default color of cols (svg color).
    label_colors :
        Colors of the labels (svg color).
    display_edges :
        If ``True``, display edges.
    edge_labels :
        Labels of the edges, as a list of tuples (source, destination, label)
    edge_width :
        Width of edges.
    edge_width_min :
        Minimum width of edges.
    edge_width_max :
        Maximum width of edges.
    display_edge_weight :
        If ``True``, display edge weights through edge widths.
    edge_color :
        Default color of edges (svg color).
    font_size :
        Font size.
    filename :
        Filename for saving image (optional).

    Returns
    -------
    image : str
        SVG image.

    Example
    -------
    >>> from sknetwork.data import movie_actor
    >>> biadjacency = movie_actor()
    >>> from sknetwork.visualization import svg_bigraph
    >>> image = svg_bigraph(biadjacency)
    >>> image[1:4]
    'svg'
    """
    n_row, n_col = biadjacency.shape

    # node positions
    if position_row is None or position_col is None:
        position_row = np.zeros((n_row, 2))
        position_col = np.ones((n_col, 2))
        if reorder:
            bilouvain = BiLouvain()
            bilouvain.fit(biadjacency)
            index_row = np.argsort(bilouvain.labels_row_)
            index_col = np.argsort(bilouvain.labels_col_)
        else:
            index_row = np.arange(n_row)
            index_col = np.arange(n_col)
        position_row[index_row, 1] = np.arange(n_row)
        position_col[index_col, 1] = np.arange(n_col) + .5 * (n_row - n_col)
    position = np.vstack((position_row, position_col))

    # node colors
    colors_row = get_node_colors(n_row, labels_row, scores_row, membership_row, color_row, label_colors)
    colors_col = get_node_colors(n_col, labels_col, scores_col, membership_col, color_col, label_colors)

    # node sizes
    if node_weights_row is None:
        node_weights_row = biadjacency.dot(np.ones(n_col))
    if node_weights_col is None:
        node_weights_col = biadjacency.T.dot(np.ones(n_row))
    node_sizes_row, node_sizes_col = get_node_sizes_bipartite(node_weights_row, node_weights_col,
                                                              node_size, node_size_min, node_size_max,
                                                              display_node_weight)

    # node widths
    node_widths_row = get_node_widths(n_row, seeds_row, node_width, node_width_max)
    node_widths_col = get_node_widths(n_col, seeds_col, node_width, node_width_max)

    # rescaling
    if not width and not height:
        raise ValueError("You must specify either the width or the height of the image.")
    position, width, height = rescale(position, width, height, margin, node_size, node_size_max, display_node_weight)

    # node names
    if names_row is not None:
        text_length = np.max(np.array([len(str(name)) for name in names_row]))
        position[:, 0] += text_length * font_size * .5
        width += text_length * font_size * .5
    if names_col is not None:
        text_length = np.max(np.array([len(str(name)) for name in names_col]))
        width += text_length * font_size * .5

    # scaling
    position *= scale
    height *= scale
    width *= scale
    position_row = position[:n_row]
    position_col = position[n_row:]

    svg = """<svg width="{}" height="{}"  xmlns="http://www.w3.org/2000/svg">\n""".format(width, height)

    # edges
    if display_edges:
        biadjacency_coo = sparse.coo_matrix(biadjacency)

        if edge_color is None:
            if names_row is None and names_col is None:
                edge_color = 'black'
            else:
                edge_color = 'gray'

        edge_colors, edge_order, edge_colors_residual = get_edge_colors(biadjacency, edge_labels, edge_color,
                                                                        label_colors)
        edge_widths = get_edge_widths(biadjacency_coo, edge_width, edge_width_min, edge_width_max, display_edge_weight)

        for ix in edge_order:
            i = biadjacency_coo.row[ix]
            j = biadjacency_coo.col[ix]
            color = edge_colors[ix]
            svg += svg_edge(pos_1=position_row[i], pos_2=position_col[j], edge_width=edge_widths[ix], edge_color=color)

        for i, j, color in edge_colors_residual:
            svg += svg_edge(pos_1=position_row[i], pos_2=position_col[j], edge_width=edge_width, edge_color=color)

    # nodes
    for i in range(n_row):
        if membership_row is None:
            svg += svg_node(position_row[i], node_sizes_row[i], colors_row[i], node_widths_row[i])
        else:
            if membership_row[i].nnz == 1:
                index = membership_row[i].indices[0]
                svg += svg_node(position_row[i], node_sizes_row[i], colors_row[index], node_widths_row[i])
            else:
                svg += svg_pie_chart_node(position_row[i], node_sizes_row[i], membership_row[i].todense(),
                                          colors_row, node_widths_row[i])

    for i in range(n_col):
        if membership_col is None:
            svg += svg_node(position_col[i], node_sizes_col[i], colors_col[i], node_widths_col[i])
        else:
            if membership_col[i].nnz == 1:
                index = membership_col[i].indices[0]
                svg += svg_node(position_col[i], node_sizes_col[i], colors_col[index], node_widths_col[i])
            else:
                svg += svg_pie_chart_node(position_col[i], node_sizes_col[i], membership_col[i].todense(),
                                          colors_col, node_widths_col[i])
    # text
    if names_row is not None:
        for i in range(n_row):
            svg += svg_text(position_row[i] - (margin_text + node_sizes_row[i], 0), names_row[i], font_size, True)
    if names_col is not None:
        for i in range(n_col):
            svg += svg_text(position_col[i] + (margin_text + node_sizes_col[i], 0), names_col[i], font_size)
    svg += """</svg>\n"""

    if filename is not None:
        with open(filename + '.svg', 'w') as f:
            f.write(svg)

    return svg
Beispiel #5
0
class TestLouvainClustering(unittest.TestCase):
    def setUp(self):
        self.louvain = Louvain(engine='python')
        self.bilouvain = BiLouvain(engine='python')
        if is_numba_available:
            self.louvain_numba = Louvain(engine='numba')
            self.bilouvain_numba = BiLouvain(engine='numba')
        else:
            with self.assertRaises(ValueError):
                Louvain(engine='numba')

    def test_unknown_types(self):
        with self.assertRaises(TypeError):
            self.louvain.fit(sparse.identity(1))

    def test_single_node_graph(self):
        self.assertEqual(
            self.louvain.fit_transform(sparse.identity(1, format='csr')), [0])

    def test_simple_graph(self):
        self.simple_directed_graph = simple_directed_graph()
        self.louvain.fit(directed2undirected(self.simple_directed_graph))
        self.assertEqual(len(self.louvain.labels_), 10)

    def test_undirected(self):
        self.louvain_high_resolution = Louvain(engine='python', resolution=2)
        self.louvain_null_resolution = Louvain(engine='python', resolution=0)
        self.karate_club = karate_club()
        self.louvain.fit(self.karate_club)
        labels = self.louvain.labels_
        self.assertEqual(labels.shape, (34, ))
        self.assertAlmostEqual(modularity(self.karate_club, labels), 0.42, 2)
        if is_numba_available:
            self.louvain_numba.fit(self.karate_club)
            labels = self.louvain_numba.labels_
            self.assertEqual(labels.shape, (34, ))
            self.assertAlmostEqual(modularity(self.karate_club, labels), 0.42,
                                   2)
        self.louvain_high_resolution.fit(self.karate_club)
        labels = self.louvain_high_resolution.labels_
        self.assertEqual(labels.shape, (34, ))
        self.assertAlmostEqual(modularity(self.karate_club, labels), 0.34, 2)
        self.louvain_null_resolution.fit(self.karate_club)
        labels = self.louvain_null_resolution.labels_
        self.assertEqual(labels.shape, (34, ))
        self.assertEqual(len(set(self.louvain_null_resolution.labels_)), 1)

    def test_directed(self):
        self.painters = painters(return_labels=False)

        self.louvain.fit(self.painters)
        labels = self.louvain.labels_
        self.assertEqual(labels.shape, (14, ))
        self.assertAlmostEqual(modularity(self.painters, labels), 0.32, 2)

        self.bilouvain.fit(self.painters)
        n1, n2 = self.painters.shape
        row_labels = self.bilouvain.row_labels_
        col_labels = self.bilouvain.col_labels_
        self.assertEqual(row_labels.shape, (n1, ))
        self.assertEqual(col_labels.shape, (n2, ))

    def test_bipartite(self):
        star_wars_graph = star_wars_villains()
        self.bilouvain.fit(star_wars_graph)
        row_labels = self.bilouvain.row_labels_
        col_labels = self.bilouvain.col_labels_
        self.assertEqual(row_labels.shape, (4, ))
        self.assertEqual(col_labels.shape, (3, ))
        if is_numba_available:
            self.bilouvain_numba.fit(star_wars_graph)
            row_labels = self.bilouvain_numba.row_labels_
            col_labels = self.bilouvain_numba.col_labels_
            self.assertEqual(row_labels.shape, (4, ))
            self.assertEqual(col_labels.shape, (3, ))

    def test_shuffling(self):
        self.louvain_shuffle_first = Louvain(engine='python',
                                             shuffle_nodes=True,
                                             random_state=0)
        self.louvain_shuffle_second = Louvain(engine='python',
                                              shuffle_nodes=True,
                                              random_state=123)
        self.bow_tie = bow_tie()
        self.louvain_shuffle_first.fit(self.bow_tie)
        self.assertEqual(self.louvain_shuffle_first.labels_[1], 1)
        self.louvain_shuffle_second.fit(self.bow_tie)
        self.assertEqual(self.louvain_shuffle_second.labels_[1], 1)