Esempio n. 1
0
def co_neighbor_graph(adjacency: Union[sparse.csr_matrix, np.ndarray],
                      normalized: bool = True,
                      method='knn',
                      n_neighbors: int = 5,
                      n_components: int = 8) -> sparse.csr_matrix:
    """Compute the co-neighborhood adjacency.

    * Graphs
    * Digraphs
    * Bigraphs

    :math:`\\tilde{A} = AF^{-1}A^T`,

    where F is a weight matrix.

    Parameters
    ----------
    adjacency:
        Adjacency of the input graph.
    normalized:
        If ``True``, F is the diagonal in-degree matrix :math:`F = \\text{diag}(A^T1)`.
        Otherwise, F is the identity matrix.
    method:
        Either ``'exact'`` or ``'knn'``. If 'exact' the output is computed with matrix multiplication.
        However, the density can be much higher than in the input graph and this can trigger Memory errors.
        If ``'knn'``, the co-neighborhood is approximated through KNNDense-search in an appropriate spectral embedding
        space.
    n_neighbors:
        Number of neighbors for the KNNDense search. Only useful if ``method='knn'``.
    n_components:
        Dimension of the embedding space. Only useful if ``method='knn'``.

    Returns
    -------
    adjacency : sparse.csr_matrix
        Adjacency of the co-neighborhood.
    """
    adjacency = check_format(adjacency).astype(float)

    if method == 'exact':
        if normalized:
            forward = normalize(adjacency.T).tocsr()
        else:
            forward = adjacency.T
        return adjacency.dot(forward)

    elif method == 'knn':
        if normalized:
            algo = GSVD(n_components, regularization=None)
        else:
            algo = SVD(n_components, regularization=None)
        embedding = algo.fit_transform(adjacency)
        knn = KNNDense(n_neighbors, undirected=True)
        knn.fit(embedding)
        return knn.adjacency_
    else:
        raise ValueError('method must be "exact" or "knn".')
Esempio n. 2
0
    def test_regular(self):

        clustering = [
            Louvain(return_aggregate=True),
            KMeans(embedding_method=GSVD(3), return_aggregate=True),
            PropagationClustering(return_aggregate=True)
        ]
        for clustering_algo in clustering:
            for adjacency in [
                    test_graph(),
                    test_digraph(),
                    test_graph_disconnect()
            ]:
                n = adjacency.shape[0]
                adjacency_bool = adjacency.copy()
                adjacency_bool.data = adjacency_bool.data.astype(bool)

                labels1 = clustering_algo.fit_transform(adjacency)
                labels2 = clustering_algo.fit_transform(adjacency_bool)
                self.assertEqual(labels1.shape, (n, ))
                self.assertEqual(labels2.shape, (n, ))

                n_labels = len(set(labels2))
                self.assertEqual(clustering_algo.adjacency_.shape,
                                 (n_labels, n_labels))
Esempio n. 3
0
 def __init__(self, n_clusters: int = 8, embedding_method: BaseEmbedding = GSVD(10), co_cluster: bool = False,
              sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True):
     super(KMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
                                  return_aggregate=return_aggregate)
     self.n_clusters = n_clusters
     self.embedding_method = embedding_method
     self.co_cluster = co_cluster
     self.bipartite = None
Esempio n. 4
0
    def test_regular(self):

        clustering = [Louvain(), KMeans(embedding_method=GSVD(3))]
        for clustering_algo in clustering:
            for adjacency in [test_graph(), test_digraph()]:
                n = adjacency.shape[0]
                labels = clustering_algo.fit_transform(adjacency)
                self.assertEqual(labels.shape, (n, ))
Esempio n. 5
0
 def __init__(self,
              embedding_method: BaseEmbedding = GSVD(10),
              n_neighbors: int = 5,
              factor_distance: float = 2,
              leaf_size: int = 16,
              p: float = 2,
              tol_nn: float = 0.01,
              n_jobs: int = 1):
     super(BiKNN,
           self).__init__(embedding_method, n_neighbors, factor_distance,
                          leaf_size, p, tol_nn, n_jobs)
Esempio n. 6
0
    def test_bipartite(self):
        biadjacency = test_bigraph()
        n_row, n_col = biadjacency.shape

        clustering = [
            BiLouvain(),
            BiKMeans(embedding_method=GSVD(3), co_cluster=True)
        ]
        for clustering_algo in clustering:
            clustering_algo.fit_transform(biadjacency)
            self.assertEqual(clustering_algo.labels_row_.shape, (n_row, ))
            self.assertEqual(clustering_algo.labels_col_.shape, (n_col, ))
Esempio n. 7
0
    def test_regular(self):

        clustering = [Louvain(), KMeans(embedding_method=GSVD(3))]
        for clustering_algo in clustering:
            for adjacency in [test_graph(), test_digraph()]:
                n = adjacency.shape[0]
                adjacency_bool = adjacency.copy()
                adjacency_bool.data = adjacency_bool.data.astype(bool)

                labels1 = clustering_algo.fit_transform(adjacency)
                labels2 = clustering_algo.fit_transform(adjacency_bool)
                self.assertEqual(labels1.shape, (n, ))
                self.assertEqual(labels2.shape, (n, ))
Esempio n. 8
0
 def test_bipartite(self):
     biadjacency = test_bigraph()
     n_row, n_col = biadjacency.shape
     for algo in [
             Louvain(return_aggregate=True),
             KMeans(embedding_method=GSVD(3),
                    co_cluster=True,
                    return_aggregate=True),
             PropagationClustering(return_aggregate=True)
     ]:
         algo.fit_transform(biadjacency)
         self.assertEqual(algo.labels_row_.shape, (n_row, ))
         self.assertEqual(algo.labels_col_.shape, (n_col, ))
Esempio n. 9
0
    def test_bipartite(self):
        biadjacency = test_bigraph()
        n_row, n_col = biadjacency.shape

        clustering = [BiLouvain(return_aggregate=True),
                      BiKMeans(embedding_method=GSVD(3), co_cluster=True, return_aggregate=True),
                      BiPropagationClustering(return_aggregate=True)]
        for clustering_algo in clustering:
            clustering_algo.fit_transform(biadjacency)
            labels_row = clustering_algo.labels_row_
            labels_col = clustering_algo.labels_col_
            self.assertEqual(labels_row.shape, (n_row,))
            self.assertEqual(labels_col.shape, (n_col,))
Esempio n. 10
0
 def __init__(self,
              n_clusters: int = 2,
              embedding_method: BaseBiEmbedding = GSVD(10),
              co_cluster: bool = False,
              sort_clusters: bool = True,
              return_membership: bool = True,
              return_aggregate: bool = True):
     super(BiKMeans, self).__init__(sort_clusters=sort_clusters,
                                    return_membership=return_membership,
                                    return_aggregate=return_aggregate,
                                    n_clusters=n_clusters,
                                    embedding_method=embedding_method)
     self.co_cluster = co_cluster
Esempio n. 11
0
    def __init__(self, embedding_method: BaseEmbedding = GSVD(10), n_neighbors: int = 5,
                 factor_distance: float = 2, leaf_size: int = 16, p: float = 2, tol_nn: float = 0.01,
                 n_jobs: Optional[int] = None):
        super(KNN, self).__init__()

        self.embedding_method = embedding_method
        self.n_neighbors = n_neighbors
        self.factor_distance = factor_distance
        self.leaf_size = leaf_size
        self.p = p
        self.tol_nn = tol_nn
        self.n_jobs = check_n_jobs(n_jobs)
        if self.n_jobs is None:
            self.n_jobs = -1
        self.bipartite = None
Esempio n. 12
0
 def test_regular(self):
     for algo in [
             Louvain(return_aggregate=True),
             KMeans(embedding_method=GSVD(3), return_aggregate=True),
             PropagationClustering(return_aggregate=True)
     ]:
         for adjacency in [
                 test_graph(),
                 test_digraph(),
                 test_graph_disconnect()
         ]:
             n = adjacency.shape[0]
             labels = algo.fit_transform(adjacency)
             n_labels = len(set(labels))
             self.assertEqual(labels.shape, (n, ))
             self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
             adjacency_bool = adjacency.astype(bool)
             labels = algo.fit_transform(adjacency_bool)
             n_labels = len(set(labels))
             self.assertEqual(labels.shape, (n, ))
             self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))