def test_unconnected_warning(self): n = [50, 50] p = [[1, 0], [0, 1]] A = sbm(n, p) with self.assertWarns(UserWarning): lse = LaplacianSpectralEmbed() lse.fit(A)
def test_bic(): """ Expect 3 clusters from a 3 block model """ np.random.seed(3) num_sims = 10 # Generate adjacency and labels n = 50 n_communites = [n, n, n] p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]]) y = np.repeat([1, 2, 3], repeats=n) for _ in range(num_sims): A = sbm(n=n_communites, p=p) # Embed to get latent positions ase = AdjacencySpectralEmbed(n_components=5) X_hat = ase.fit_transform(A) # Compute clusters gclust = GaussianCluster(min_components=10) gclust.fit(X_hat, y) bics = gclust.bic_ aris = gclust.ari_ bic_argmin = bics.iloc[:, 0].values.argmin() assert_equal(2, bic_argmin) # The plus one is to adjust the index by min_components assert_allclose(1, aris.iloc[:, 0][bic_argmin + 1])
def test_ase_three_blocks(): """ Expect 3 clusters from a 3 block model """ np.random.seed(3) num_sims = 10 # Generate adjacency and labels n = 50 n_communites = [n, n, n] p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]]) y = np.repeat([1, 2, 3], repeats=n) for _ in range(num_sims): A = sbm(n=n_communites, p=p) # Embed to get latent positions ase = AdjacencySpectralEmbed(n_components=5) X_hat = ase.fit_transform(A) # Compute clusters gclust = GaussianCluster(min_components=10) gclust.fit(X_hat, y) n_components = gclust.n_components_ # Assert that the three cluster model is the best assert_equal(n_components, 3) # Asser that we get perfect clustering assert_allclose(gclust.ari_.loc[n_components], 1)
def test_ase_three_blocks(): """ Expect 3 clusters from a 3 block model """ np.random.seed(1) # Generate adjacency and labels n = 50 n_communites = [n, n, n] p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]]) y = np.repeat([1, 2, 3], repeats=n) A = sbm(n=n_communites, p=p) # Embed to get latent positions ase = AdjacencySpectralEmbed(n_components=5) X_hat = ase.fit_transform(A) # Compute clusters AutoGMM = AutoGMMCluster(max_components=10) AutoGMM.fit(X_hat, y) n_components = AutoGMM.n_components_ # Assert that the three cluster model is the best assert_equal(n_components, 3) # Asser that we get perfect clustering assert_allclose(AutoGMM.ari_, 1)
def test_output_two_block_sbm(): np.random.seed(10) n_communities = [100, 100] P = np.array([[0.5, 0.1], [0.1, 0.5]]) A = sbm(n_communities, P) elbows, _ = select_dimension(A, n_elbows=2) assert_equal(elbows[0], 2)
def make_train_directed(n=[128, 128], m=10): p1 = [[0, 0.9], [0, 0]] p2 = [[0, 0], [0.9, 0]] p3 = [[0.9, 0.9], [0, 0]] p4 = [[0, 0], [0.9, 0.9]] A = [sbm(n, p, directed=True) for _ in range(m) for p in [p1, p2, p3, p4]] return A
def test_sort_inds(): B = np.array([ [0, 0.2, 0.1, 0.1, 0.1], [0.2, 0.8, 0.1, 0.3, 0.1], [0.15, 0.1, 0, 0.05, 0.1], [0.1, 0.1, 0.2, 1, 0.1], [0.1, 0.2, 0.1, 0.1, 0.8], ]) g = sbm([10, 30, 50, 25, 25], B, directed=True) degrees = g.sum(axis=0) + g.sum(axis=1) degree_sort_inds = np.argsort(degrees) labels2 = 40 * ["0"] + 100 * ["1"] labels1 = 10 * ["d"] + 30 * ["c"] + 50 * ["d"] + 25 * ["e"] + 25 * ["c"] labels1 = np.array(labels1) labels2 = np.array(labels2) sorted_inds = _sort_inds(g, labels1, labels2) # sort outer blocks first if given, sort by num verts in the block # for inner hier, sort by num verts for that category across the entire graph # ie if there are multiple inner hier across different outer blocks, sort # by prevalence in the entire graph, not within block # this is to make the ordering within outer block consistent # within a block, sort by degree # outer block order should thus be: 1, 0 # inner block order should thus be: d, c, e # show that outer blocks are sorted correctly labels2 = labels2[sorted_inds] assert np.all(labels2[:100] == "1") assert np.all(labels2[100:] == "0") # show that inner blocks are sorted correctly labels1 = labels1[sorted_inds] assert np.all(labels1[:50] == "d") assert np.all(labels1[50:75] == "c") assert np.all(labels1[75:100] == "e") assert np.all(labels1[100:110] == "d") assert np.all(labels1[110:] == "c") # show that within block, everything is in descending degree order degrees = degrees[sorted_inds] assert np.all(np.diff(degrees[:50]) <= 0) assert np.all(np.diff(degrees[50:75]) <= 0) assert np.all(np.diff(degrees[75:100]) <= 0) assert np.all(np.diff(degrees[100:110]) <= 0) assert np.all(np.diff(degrees[110:]) <= 0)
def make_train_undirected(n=[128, 128], m=10, alpha=1): """ Make 4 class training dataset n = number of vertices m = number of graphs from each class """ c1 = np.array([[0.1, 0], [0, 0.1]]) c2 = -1 * c1 c3 = np.array([[0.1, 0], [0, 0]]) c4 = np.array([[0, 0], [0, 0.1]]) A = [ sbm(n, np.ones((2, 2)) * 0.25 + alpha * c) for _ in range(m) for c in [c1, c2, c3, c4] ] return A
def test_casc_cca(): n = [10, 10] p = [[0.8, 0.2], [0.2, 0.8]] np.random.seed(105) A = sbm(n=n, p=p) covarites = np.array( [ [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0], ] ) casc = CovariateAssistedSpectralEmbed( n_components=2, assortative=True, cca=True, check_lcc=False ) casc_results = casc.fit_predict(np.array(A), covarites, y=None, return_full=False) ans = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ResultARI = ARI(casc_results, ans) assert ResultARI == 1
def _test_sbm_er_binary_undirected(self, method, P, *args, **kwargs): np.random.seed(8888) num_sims = 50 verts = 200 communities = 2 verts_per_community = [100, 100] sbm_wins = 0 er_wins = 0 for sim in range(0, num_sims): sbm_sample = sbm(verts_per_community, P) er = er_np(verts, 0.5) embed_sbm = method(n_components=2) embed_er = method(n_components=2) labels_sbm = np.zeros((verts), dtype=np.int8) labels_er = np.zeros((verts), dtype=np.int8) labels_sbm[100:] = 1 labels_er[100:] = 1 embed_sbm.fit(sbm_sample) embed_er.fit(er) X_sbm = embed_sbm.latent_left_ X_er = embed_er.latent_left_ self.assertEqual(X_sbm.shape, (verts, communities)) self.assertEqual(X_er.shape, (verts, communities)) aris = _kmeans_comparison((X_sbm, X_er), (labels_sbm, labels_er), communities) sbm_wins = sbm_wins + (aris[0] > aris[1]) er_wins = er_wins + (aris[0] < aris[1]) self.assertTrue(sbm_wins > er_wins)