def co_neighbor_graph(adjacency: Union[sparse.csr_matrix, np.ndarray], normalized: bool = True, method='knn', n_neighbors: int = 5, n_components: int = 8) -> sparse.csr_matrix: """Compute the co-neighborhood adjacency. * Graphs * Digraphs * Bigraphs :math:`\\tilde{A} = AF^{-1}A^T`, where F is a weight matrix. Parameters ---------- adjacency: Adjacency of the input graph. normalized: If ``True``, F is the diagonal in-degree matrix :math:`F = \\text{diag}(A^T1)`. Otherwise, F is the identity matrix. method: Either ``'exact'`` or ``'knn'``. If 'exact' the output is computed with matrix multiplication. However, the density can be much higher than in the input graph and this can trigger Memory errors. If ``'knn'``, the co-neighborhood is approximated through KNNDense-search in an appropriate spectral embedding space. n_neighbors: Number of neighbors for the KNNDense search. Only useful if ``method='knn'``. n_components: Dimension of the embedding space. Only useful if ``method='knn'``. Returns ------- adjacency : sparse.csr_matrix Adjacency of the co-neighborhood. """ adjacency = check_format(adjacency).astype(float) if method == 'exact': if normalized: forward = normalize(adjacency.T).tocsr() else: forward = adjacency.T return adjacency.dot(forward) elif method == 'knn': if normalized: algo = GSVD(n_components, regularization=None) else: algo = SVD(n_components, regularization=None) embedding = algo.fit_transform(adjacency) knn = KNNDense(n_neighbors, undirected=True) knn.fit(embedding) return knn.adjacency_ else: raise ValueError('method must be "exact" or "knn".')
def test_regular(self): clustering = [ Louvain(return_aggregate=True), KMeans(embedding_method=GSVD(3), return_aggregate=True), PropagationClustering(return_aggregate=True) ] for clustering_algo in clustering: for adjacency in [ test_graph(), test_digraph(), test_graph_disconnect() ]: n = adjacency.shape[0] adjacency_bool = adjacency.copy() adjacency_bool.data = adjacency_bool.data.astype(bool) labels1 = clustering_algo.fit_transform(adjacency) labels2 = clustering_algo.fit_transform(adjacency_bool) self.assertEqual(labels1.shape, (n, )) self.assertEqual(labels2.shape, (n, )) n_labels = len(set(labels2)) self.assertEqual(clustering_algo.adjacency_.shape, (n_labels, n_labels))
def __init__(self, n_clusters: int = 8, embedding_method: BaseEmbedding = GSVD(10), co_cluster: bool = False, sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True): super(KMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership, return_aggregate=return_aggregate) self.n_clusters = n_clusters self.embedding_method = embedding_method self.co_cluster = co_cluster self.bipartite = None
def test_regular(self): clustering = [Louvain(), KMeans(embedding_method=GSVD(3))] for clustering_algo in clustering: for adjacency in [test_graph(), test_digraph()]: n = adjacency.shape[0] labels = clustering_algo.fit_transform(adjacency) self.assertEqual(labels.shape, (n, ))
def __init__(self, embedding_method: BaseEmbedding = GSVD(10), n_neighbors: int = 5, factor_distance: float = 2, leaf_size: int = 16, p: float = 2, tol_nn: float = 0.01, n_jobs: int = 1): super(BiKNN, self).__init__(embedding_method, n_neighbors, factor_distance, leaf_size, p, tol_nn, n_jobs)
def test_bipartite(self): biadjacency = test_bigraph() n_row, n_col = biadjacency.shape clustering = [ BiLouvain(), BiKMeans(embedding_method=GSVD(3), co_cluster=True) ] for clustering_algo in clustering: clustering_algo.fit_transform(biadjacency) self.assertEqual(clustering_algo.labels_row_.shape, (n_row, )) self.assertEqual(clustering_algo.labels_col_.shape, (n_col, ))
def test_regular(self): clustering = [Louvain(), KMeans(embedding_method=GSVD(3))] for clustering_algo in clustering: for adjacency in [test_graph(), test_digraph()]: n = adjacency.shape[0] adjacency_bool = adjacency.copy() adjacency_bool.data = adjacency_bool.data.astype(bool) labels1 = clustering_algo.fit_transform(adjacency) labels2 = clustering_algo.fit_transform(adjacency_bool) self.assertEqual(labels1.shape, (n, )) self.assertEqual(labels2.shape, (n, ))
def test_bipartite(self): biadjacency = test_bigraph() n_row, n_col = biadjacency.shape for algo in [ Louvain(return_aggregate=True), KMeans(embedding_method=GSVD(3), co_cluster=True, return_aggregate=True), PropagationClustering(return_aggregate=True) ]: algo.fit_transform(biadjacency) self.assertEqual(algo.labels_row_.shape, (n_row, )) self.assertEqual(algo.labels_col_.shape, (n_col, ))
def test_bipartite(self): biadjacency = test_bigraph() n_row, n_col = biadjacency.shape clustering = [BiLouvain(return_aggregate=True), BiKMeans(embedding_method=GSVD(3), co_cluster=True, return_aggregate=True), BiPropagationClustering(return_aggregate=True)] for clustering_algo in clustering: clustering_algo.fit_transform(biadjacency) labels_row = clustering_algo.labels_row_ labels_col = clustering_algo.labels_col_ self.assertEqual(labels_row.shape, (n_row,)) self.assertEqual(labels_col.shape, (n_col,))
def __init__(self, n_clusters: int = 2, embedding_method: BaseBiEmbedding = GSVD(10), co_cluster: bool = False, sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True): super(BiKMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership, return_aggregate=return_aggregate, n_clusters=n_clusters, embedding_method=embedding_method) self.co_cluster = co_cluster
def __init__(self, embedding_method: BaseEmbedding = GSVD(10), n_neighbors: int = 5, factor_distance: float = 2, leaf_size: int = 16, p: float = 2, tol_nn: float = 0.01, n_jobs: Optional[int] = None): super(KNN, self).__init__() self.embedding_method = embedding_method self.n_neighbors = n_neighbors self.factor_distance = factor_distance self.leaf_size = leaf_size self.p = p self.tol_nn = tol_nn self.n_jobs = check_n_jobs(n_jobs) if self.n_jobs is None: self.n_jobs = -1 self.bipartite = None
def test_regular(self): for algo in [ Louvain(return_aggregate=True), KMeans(embedding_method=GSVD(3), return_aggregate=True), PropagationClustering(return_aggregate=True) ]: for adjacency in [ test_graph(), test_digraph(), test_graph_disconnect() ]: n = adjacency.shape[0] labels = algo.fit_transform(adjacency) n_labels = len(set(labels)) self.assertEqual(labels.shape, (n, )) self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels)) adjacency_bool = adjacency.astype(bool) labels = algo.fit_transform(adjacency_bool) n_labels = len(set(labels)) self.assertEqual(labels.shape, (n, )) self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))