def test_normalized(self): # normalized Laplacian spectral = Spectral(self.k, barycenter=False, normalized=False) embedding = spectral.fit_transform(self.adjacency) weights = self.adjacency.dot(np.ones(self.n)) + self.n * spectral.regularization_ self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0) error = np.abs(spectral.predict(self.adjacency[:4]) - embedding[:4]).sum()
def test_predict(self): spectral = Spectral(4) spectral.fit(self.adjacency) unit_vector = np.zeros(self.adjacency.shape[0]) unit_vector[0] = 1 error = max(abs(spectral.predict(self.adjacency.dot(unit_vector)) - spectral.embedding_[0])) self.assertAlmostEqual(error, 0)
def test_normalization(self): for adjacency in [test_graph(), test_graph_disconnect()]: spectral = Spectral(3) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual( np.linalg.norm( np.linalg.norm(embedding, axis=1) - np.ones(adjacency.shape[0])), 0)
def test_noreg(self): adjacency = test_graph_disconnect() n = adjacency.shape[0] spectral = Spectral(regularization=None, equalize=True) with self.assertRaises(ValueError): spectral.fit(adjacency) spectral = Spectral(regularization=0.) spectral.fit(adjacency) spectral.predict(np.random.rand(n))
def test_regularization(self): for adjacency in [test_graph(), test_graph_disconnect()]: n = adjacency.shape[0] # random walk regularization = 0.1 spectral = Spectral(3, regularization=regularization, normalized=False) embedding = spectral.fit_transform(adjacency) weights = adjacency.dot(np.ones(n)) + regularization self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0) # Laplacian spectral = Spectral(3, decomposition='laplacian', regularization=1, normalized=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0) # without regularization spectral = Spectral(3, decomposition='laplacian', regularization=-1, normalized=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
def test_bipartite(self): for biadjacency in [ test_digraph(), test_bigraph(), test_bigraph_disconnect() ]: n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) # normalized Laplacian spectral = Spectral(3) spectral.fit(biadjacency) embedding_full = np.vstack( [spectral.embedding_row_, spectral.embedding_col_]) weights = adjacency.dot(np.ones(n_row + n_col)) if not is_connected(adjacency): weights += 1 self.assertAlmostEqual( np.linalg.norm(embedding_full.T.dot(weights)), 0) # regular Laplacian spectral = Spectral(3, normalized_laplacian=False) spectral.fit(biadjacency) embedding_full = np.vstack( [spectral.embedding_row_, spectral.embedding_col_]) self.assertAlmostEqual(np.linalg.norm(embedding_full.sum(axis=0)), 0)
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'SpectralWard': """Apply embedding method followed by hierarchical clustering to the graph. Parameters ---------- adjacency: Adjacency matrix of the graph. Returns ------- self: :class:`SpectralWard` """ spectral = Spectral(self.embedding_dimension).fit(adjacency) embedding = spectral.embedding_ if self.l2normalization: norm = np.linalg.norm(embedding, axis=1) norm[norm == 0.] = 1 embedding /= norm[:, np.newaxis] ward = Ward() ward.fit(embedding) self.dendrogram_ = ward.dendrogram_ return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'SpectralClustering': """Apply embedding method followed by clustering to the graph. Parameters ---------- adjacency: Adjacency matrix of the graph. Returns ------- self: :class:`SpectralClustering` """ adjacency = check_format(adjacency) if not is_symmetric(adjacency): raise ValueError('The adjacency is not symmetric.') spectral = Spectral(self.embedding_dimension).fit(adjacency) embedding = spectral.embedding_ if self.l2normalization: norm = np.linalg.norm(embedding, axis=1) norm[norm == 0.] = 1 embedding /= norm[:, np.newaxis] kmeans = KMeans(self.n_clusters) kmeans.fit(embedding) self.labels_ = kmeans.labels_ return self
def test_options(self): ward = Ward() ward_options = Ward(embedding_method=Spectral(3), co_cluster=True) for algo in [ward, ward_options]: for input_matrix in [test_graph(), test_digraph(), test_bigraph()]: dendrogram = algo.fit_transform(input_matrix) self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4)) if algo.co_cluster: self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4))
def test_solvers(self): # solver spectral = Spectral(self.k, solver='lanczos') embedding = spectral.fit_transform(self.adjacency) self.assertEqual(embedding.shape, (self.n, self.k)) spectral = Spectral(self.k, solver='halko') embedding = spectral.fit_transform(self.adjacency) self.assertEqual(embedding.shape, (self.n, self.k))
def test_directed(self): for adjacency in [test_digraph(), test_digraph().astype(bool)]: # random walk spectral = Spectral(3, normalized=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual( np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0) # Laplacian spectral = Spectral(3, decomposition='laplacian', normalized=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual( np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0) self.assertAlmostEqual( np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
def test_directed(self): for adjacency in [test_digraph(), test_digraph().astype(bool)]: n_row, n_col = adjacency.shape # normalized Laplacian spectral = Spectral(3) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual( np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0) # standard Laplacian spectral = Spectral(3, normalized_laplacian=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual( np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0) self.assertAlmostEqual( np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
def test_regularization(self): for adjacency in [test_graph(), test_graph_disconnect()]: n = adjacency.shape[0] # normalized Laplacian regularization = 0.1 spectral = Spectral(3, regularization=regularization) embedding = spectral.fit_transform(adjacency) weights = adjacency.dot(np.ones(n)) + regularization self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0) # standard Laplacian spectral = Spectral(3, normalized_laplacian=False, regularization=1) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0) # without regularization spectral = Spectral(3, normalized_laplacian=False, regularization=-1) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
def test_undirected(self): for adjacency in [test_graph(), test_graph_disconnect()]: n = adjacency.shape[0] # normalized Laplacian spectral = Spectral(3) embedding = spectral.fit_transform(adjacency) weights = adjacency.dot(np.ones(n)) if not is_connected(adjacency): weights += 1 self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0) self.assertAlmostEqual( np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0) # regular Laplacian spectral = Spectral(3, normalized_laplacian=False) embedding = spectral.fit_transform(adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0) self.assertAlmostEqual( np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
def test_undirected(self): n_clusters = 3 algo = KMeans(n_clusters, GSVD(2)) algo_options = KMeans(n_clusters, Spectral(3), co_cluster=True, sort_clusters=False) for adjacency in [ test_graph(), test_graph_disconnect(), test_digraph() ]: n = adjacency.shape[0] labels = algo.fit_transform(adjacency) self.assertEqual(len(set(labels)), n_clusters) self.assertEqual(algo.membership_.shape, (n, n_clusters)) self.assertEqual(algo.aggregate_.shape, (n_clusters, n_clusters)) labels = algo_options.fit_transform(adjacency) self.assertEqual(len(set(labels)), n_clusters)
def test_bipartite(self): algo = KMeans(3, GSVD(2)) algo_options = KMeans(4, Spectral(3), co_cluster=True, sort_clusters=False) for biadjacency in [test_bigraph(), test_bigraph_disconnect()]: n_row, n_col = biadjacency.shape algo.fit(biadjacency) self.assertEqual(len(algo.labels_), n_row) self.assertEqual(algo.membership_.shape, (n_row, 3)) self.assertEqual(algo.membership_row_.shape, (n_row, 3)) self.assertEqual(algo.membership_col_.shape, (n_col, 3)) self.assertEqual(algo.aggregate_.shape, (3, 3)) algo_options.fit(biadjacency) labels = np.hstack( (algo_options.labels_row_, algo_options.labels_col_)) self.assertEqual(len(set(labels)), 4) self.assertEqual(algo_options.membership_.shape, (n_row, 4)) self.assertEqual(algo_options.membership_row_.shape, (n_row, 4)) self.assertEqual(algo_options.membership_col_.shape, (n_col, 4)) self.assertEqual(algo_options.aggregate_.shape, (4, 4))
def setUp(self): """Algorithms by input types.""" self.methods = [Spectral(), GSVD(), SVD()] self.bimethods = [BiSpectral(), GSVD(), SVD()]
def test_spectral_basic(self): # Spectral with lanczos solver spectral = Spectral(2, normalized_laplacian=False, scaling=None, solver='lanczos') spectral.fit(self.adjacency) self.assertTrue(has_proper_shape(self.adjacency, spectral)) self.assertTrue(min(spectral.eigenvalues_ >= -1) and max(spectral.eigenvalues_ <= 1)) # test if the embedding is centered # without regularization spectral = Spectral(2, normalized_laplacian=False, scaling=None, solver='lanczos', regularization=0) spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0) # with regularization spectral.regularization = 0.1 spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0)
def test_spectral_divide_scaling(self): spectral = Spectral(2, scaling='divide') spectral.regularization = None spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0, 7)
def test_equalize(self): spectral = Spectral(self.k, equalize=True) spectral.fit(self.adjacency) spectral.predict(np.ones(self.n))
def test_spectral_normalized(self): # Spectral with lanczos solver spectral = Spectral(2, normalized_laplacian=True, solver='lanczos') spectral.fit(self.adjacency) self.assertTrue(has_proper_shape(self.adjacency, spectral)) self.assertTrue(min(spectral.eigenvalues_ >= -1e-6) and max(spectral.eigenvalues_ <= 2)) # test if the embedding is centered # without regularization spectral.regularization = None spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0) # with regularization spectral.regularization = 0.1 spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0, places=2) # Spectral with halko solver spectral = Spectral(2, normalized_laplacian=True, solver='halko') spectral.fit(self.adjacency) self.assertTrue(has_proper_shape(self.adjacency, spectral)) self.assertTrue(min(spectral.eigenvalues_ >= -1e-6) and max(spectral.eigenvalues_ <= 2)) # test if the embedding is centered # without regularization spectral.regularization = None spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0) # with regularization spectral.regularization = 0.1 spectral.fit(self.house) self.assertAlmostEqual(barycenter_norm(self.house, spectral), 0, places=2)
def test_regularization(self): adjacency = test_graph() method = Spectral() self.assertEqual(method._get_regularization(-1, adjacency), 0)
def test_regular(self): # regular Laplacian spectral = Spectral(self.k, normalized_laplacian=False, barycenter=False, normalized=False) embedding = spectral.fit_transform(self.adjacency) self.assertAlmostEqual(np.linalg.norm(embedding.mean(axis=0)), 0) error = np.abs(spectral.predict(self.adjacency[1]) - embedding[1]).sum() self.assertAlmostEqual(error, 0) spectral = Spectral(self.k, normalized_laplacian=False, regularization=0, equalize=True) with self.assertRaises(ValueError): spectral.fit(test_bigraph()) with self.assertRaises(ValueError): spectral.fit(test_digraph()) with self.assertRaises(ValueError): spectral.fit(test_graph_disconnect()) with self.assertWarns(Warning): n = self.k - 1 spectral.fit_transform(np.ones((n, n)))
def test_no_scaling(self): spectral = Spectral(self.k, scaling=0) spectral.fit(self.adjacency) spectral.predict(np.ones(self.n))
def test_options(self): adjacency = test_graph() ward = Ward(embedding_method=Spectral(3)) dendrogram = ward.fit_transform(adjacency) self.assertEqual(dendrogram.shape, (adjacency.shape[0] - 1, 4))