def test_ase_three_blocks(): """ Expect 3 clusters from a 3 block model """ np.random.seed(1) # Generate adjacency and labels n = 50 n_communites = [n, n, n] p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]]) y = np.repeat([1, 2, 3], repeats=n) A = sbm(n=n_communites, p=p) # Embed to get latent positions ase = AdjacencySpectralEmbed(n_components=5) X_hat = ase.fit_transform(A) # Compute clusters AutoGMM = AutoGMMCluster(max_components=10) AutoGMM.fit(X_hat, y) n_components = AutoGMM.n_components_ # Assert that the three cluster model is the best assert_equal(n_components, 3) # Asser that we get perfect clustering assert_allclose(AutoGMM.ari_, 1)
def setUp(self): np.random.seed(9001) n = [10, 10] p = np.array([[0.9, 0.1], [0.1, 0.9]]) wt = [[normal, poisson], [poisson, normal]] wtargs = [ [dict(loc=3, scale=1), dict(lam=5)], [dict(lam=5), dict(loc=3, scale=1)], ] self.testgraphs = dict( Guw=sbm(n=n, p=p), Gw=sbm(n=n, p=p, wt=wt, wtargs=wtargs), Guwd=sbm(n=n, p=p, directed=True), Gwd=sbm(n=n, p=p, wt=wt, wtargs=wtargs, directed=True), ) self.lse = LaplacianSpectralEmbed(n_components=2)
def test_unconnected_warning(self): n = [50, 50] p = [[1, 0], [0, 1]] A = sbm(n, p) with self.assertWarns(UserWarning): lse = LaplacianSpectralEmbed() lse.fit(A)
def test_ase_three_blocks(self): """ Expect 3 clusters from a 3 block model """ np.random.seed(3) num_sims = 10 # Generate adjacency and labels n = 50 n_communites = [n, n, n] p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]]) y = np.repeat([1, 2, 3], repeats=n) for _ in range(num_sims): A = sbm(n=n_communites, p=p) # Embed to get latent positions ase = AdjacencySpectralEmbed(n_components=5) X_hat = ase.fit_transform(A) # Compute clusters gclust = GaussianCluster(min_components=10) gclust.fit(X_hat, y) n_components = gclust.n_components_ # Assert that the three cluster model is the best assert_equal(n_components, 3) # Asser that we get perfect clustering assert_allclose(gclust.ari_.loc[n_components], 1)
def test_unconnected_warning(self): n = [50, 50] p = [[1, 0], [0, 1]] A = csr_matrix(sbm(n, p)) with pytest.warns(UserWarning): lse = LaplacianSpectralEmbed() lse.fit(A)
def test_output_two_block_sbm(self): np.random.seed(10) n_communities = [100, 100] P = np.array([[0.5, 0.1], [0.1, 0.5]]) A = sbm(n_communities, P) elbows, _ = select_dimension(A, n_elbows=2) assert_equal(elbows[0], 2)
def make_train_directed(n=[128, 128], m=10): p1 = [[0, 0.9], [0, 0]] p2 = [[0, 0], [0.9, 0]] p3 = [[0.9, 0.9], [0, 0]] p4 = [[0, 0], [0.9, 0.9]] A = [sbm(n, p, directed=True) for _ in range(m) for p in [p1, p2, p3, p4]] return A
def test_sort_inds(): B = np.array( [ [0, 0.2, 0.1, 0.1, 0.1], [0.2, 0.8, 0.1, 0.3, 0.1], [0.15, 0.1, 0, 0.05, 0.1], [0.1, 0.1, 0.2, 1, 0.1], [0.1, 0.2, 0.1, 0.1, 0.8], ] ) g = sbm([10, 30, 50, 25, 25], B, directed=True) degrees = g.sum(axis=0) + g.sum(axis=1) degree_sort_inds = np.argsort(degrees) labels2 = 40 * ["0"] + 100 * ["1"] labels1 = 10 * ["d"] + 30 * ["c"] + 50 * ["d"] + 25 * ["e"] + 25 * ["c"] labels1 = np.array(labels1) labels2 = np.array(labels2) sorted_inds = _sort_inds(g, labels1, labels2, True) # sort outer blocks first if given, sort by num verts in the block # for inner hier, sort by num verts for that category across the entire graph # ie if there are multiple inner hier across different outer blocks, sort # by prevalence in the entire graph, not within block # this is to make the ordering within outer block consistent # within a block, sort by degree # outer block order should thus be: 1, 0 # inner block order should thus be: d, c, e # show that outer blocks are sorted correctly labels2 = labels2[sorted_inds] assert np.all(labels2[:100] == "1") assert np.all(labels2[100:] == "0") # show that inner blocks are sorted correctly labels1 = labels1[sorted_inds] assert np.all(labels1[:50] == "d") assert np.all(labels1[50:75] == "c") assert np.all(labels1[75:100] == "e") assert np.all(labels1[100:110] == "d") assert np.all(labels1[110:] == "c") # show that within block, everything is in descending degree order degrees = degrees[sorted_inds] assert np.all(np.diff(degrees[:50]) <= 0) assert np.all(np.diff(degrees[50:75]) <= 0) assert np.all(np.diff(degrees[75:100]) <= 0) assert np.all(np.diff(degrees[100:110]) <= 0) assert np.all(np.diff(degrees[110:]) <= 0)
def _gen_mat_data(n: int = 20, m: int = 20, p: int = 0.50, mat_type: str = 'sb', binary: bool = False, asfile: bool = True, n_graphs: int = 1): if binary is True: wt = 1 else: wt = np.random.uniform mat_list = [] mat_file_list = [] for nm in range(n_graphs): if mat_type == 'er': mat = largest_connected_component( symmetrize( remove_loops( er_nm(n, m, wt=np.random.uniform, wtargs=dict(low=0, high=1))))) elif mat_type == 'sb': if p is None: raise ValueError( f"for mat_type {mat_type}, p cannot be None") mat = largest_connected_component( symmetrize( remove_loops( sbm(np.array([n]), np.array([[p]]), wt=wt, wtargs=dict(low=0, high=1))))) else: raise ValueError(f"mat_type {mat_type} not recognized!") mat_list.append(mat) if asfile is True: mat_path_tmp = tempfile.NamedTemporaryFile(mode='w+', suffix='.npy', delete=False) mat_path = str(mat_path_tmp.name) np.save(mat_path, mat) mat_file_list.append(mat_path) mat_path_tmp.close() return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
def _test_sbm_er_binary(self, method, P, directed=False, sparse=False, *args, **kwargs): np.random.seed(8888) num_sims = 50 verts = 200 communities = 2 verts_per_community = [100, 100] sbm_wins = 0 er_wins = 0 for sim in range(0, num_sims): sbm_sample = sbm(verts_per_community, P, directed=directed) er = er_np(verts, 0.5, directed=directed) if sparse: sbm_sample = csr_matrix(sbm_sample) er = csr_matrix(er) embed_sbm = method(n_components=2, concat=directed) embed_er = method(n_components=2, concat=directed) labels_sbm = np.zeros((verts), dtype=np.int8) labels_er = np.zeros((verts), dtype=np.int8) labels_sbm[100:] = 1 labels_er[100:] = 1 X_sbm = embed_sbm.fit_transform(sbm_sample) X_er = embed_er.fit_transform(er) if directed: self.assertEqual(X_sbm.shape, (verts, 2 * communities)) self.assertEqual(X_er.shape, (verts, 2 * communities)) else: self.assertEqual(X_sbm.shape, (verts, communities)) self.assertEqual(X_er.shape, (verts, communities)) aris = _kmeans_comparison((X_sbm, X_er), (labels_sbm, labels_er), communities) sbm_wins = sbm_wins + (aris[0] > aris[1]) er_wins = er_wins + (aris[0] < aris[1]) self.assertTrue(sbm_wins > er_wins)
def make_train_undirected(n=[128, 128], m=10, alpha=1): """ Make 4 class training dataset n = number of vertices m = number of graphs from each class """ c1 = np.array([[0.1, 0], [0, 0.1]]) c2 = -1 * c1 c3 = np.array([[0.1, 0], [0, 0]]) c4 = np.array([[0, 0], [0, 0.1]]) A = [ sbm(n, np.ones((2, 2)) * 0.25 + alpha * c) for _ in range(m) for c in [c1, c2, c3, c4] ] return A
def test_embedding(self): epsilon = 0.1 nodes_per_community = 100 P = np.array([[0.8, 0.2], [0.2, 0.8]]) undirected, labels_ = sbm(2 * [nodes_per_community], P, return_labels=True) oos_idx = 0 A, a = remove_vertices(undirected, indices=oos_idx, return_removed=True) lse = LaplacianSpectralEmbed(n_components=2) X_hat = lse.fit_transform(A) w = lse.transform(a) self.assertTrue( X_hat[0][0] - epsilon < w[0][0] < X_hat[0][0] + epsilon) self.assertTrue( X_hat[0][1] - epsilon < w[0][1] < X_hat[0][1] + epsilon)
def test_directed_correct_latent_positions(self): # setup ase = AdjacencySpectralEmbed(n_components=3) P = np.array([[0.9, 0.1, 0.1], [0.3, 0.6, 0.1], [0.1, 0.5, 0.6]]) M, labels = sbm([200, 200, 200], P, directed=True, return_labels=True) # one node from each community oos_idx = np.nonzero(np.r_[1, np.diff(labels)[:-1]])[0] labels = list(labels) oos_labels = [labels.pop(i) for i in oos_idx] # Grab out-of-sample, fit, transform A, a = remove_vertices(M, indices=oos_idx, return_removed=True) latent_left, latent_right = ase.fit_transform(A) oos_left, oos_right = ase.transform(a) # separate into communities for i, latent in enumerate([latent_left, latent_right]): left = i == 0 df = pd.DataFrame( { "Type": labels, "Dimension 1": latent[:, 0], "Dimension 2": latent[:, 1], "Dimension 3": latent[:, 2], } ) # make sure that oos vertices are closer to their true community averages than other community averages means = df.groupby("Type").mean() if left: avg_dist_within = np.diag(pairwise_distances(means, oos_left)) avg_dist_between = np.diag(pairwise_distances(means, oos_right)) self.assertTrue(all(avg_dist_within < avg_dist_between)) elif not left: avg_dist_within = np.diag(pairwise_distances(means, oos_right)) avg_dist_between = np.diag(pairwise_distances(means, oos_left)) self.assertTrue(all(avg_dist_within < avg_dist_between))
def _gen_mat_data(n: int=20, m: int=20, p: int=0.50, mat_type: str='sb', binary: bool=False, asfile: bool=True, n_graphs: int=1, lcc: bool=False, modality: str='func'): if binary is True: wt = 1 else: wt = np.random.uniform mat_list = [] mat_file_list = [] if n_graphs > 0: for nm in range(n_graphs): if mat_type == 'er': mat = symmetrize( remove_loops(er_nm(n, m, wt=np.random.uniform, wtargs=dict(low=0, high=1)))) elif mat_type == 'sb': if p is None: raise ValueError( f"for mat_type {mat_type}, p cannot be None") mat = symmetrize( remove_loops(sbm(np.array([n]), np.array([[p]]), wt=wt, wtargs=dict(low=0, high=1)))) else: raise ValueError(f"mat_type {mat_type} not recognized!") if lcc is True: mat = largest_connected_component(mat) mat_list.append(autofix(mat)) if asfile is True: path_tmp = tempfile.NamedTemporaryFile(mode='w+', suffix='.npy', delete=False) mat_path_tmp = str(path_tmp.name) out_folder = f"{str(Path.home())}/test_mats" os.makedirs(out_folder, exist_ok=True) if modality == 'func': mat_path = f"{out_folder}/graph_sub-999_modality-func_" \ f"model-corr_template-" \ f"MNI152_2mm_" \ f"parc_tol-6fwhm_hpass-" \ f"0Hz_" \ f"signal-mean_thrtype-prop_thr-" \ f"{round(random.uniform(0, 1),2)}.npy" elif modality == 'dwi': mat_path = f"{out_folder}/graph_sub-999_modality-func_" \ f"model-csa_template-" \ f"MNI152_2mm_tracktype-local_" \ f"traversal-det_minlength-30_" \ f"tol-5_thrtype-prop_thr-" \ f"{round(random.uniform(0, 1),2)}.npy" shutil.copyfile(mat_path_tmp, mat_path) np.save(mat_path, mat) mat_file_list.append(mat_path) path_tmp.close() return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
import pytest import numpy as np from graspologic.embed.ase import AdjacencySpectralEmbed from graspologic.simulations.simulations import sbm from graspologic.nominate import SpectralVertexNomination # global constants for tests n_verts = 50 p = np.array([[0.7, 0.25, 0.2], [0.25, 0.8, 0.3], [0.2, 0.3, 0.85]]) labels = np.array([0] * n_verts + [1] * n_verts + [2] * n_verts) adj = np.array(sbm(3 * [n_verts], p), dtype=np.int) embeder = AdjacencySpectralEmbed() pre_embeded = embeder.fit_transform(adj) def _nominate(X, seed, nominator=None, k=None): if nominator is None: nominator = SpectralVertexNomination(n_neighbors=k) nominator.fit(X) n_verts = X.shape[0] nom_list, dists = nominator.predict(seed) assert nom_list.shape == (n_verts, seed.shape[0]) assert dists.shape == (n_verts, seed.shape[0]) return nom_list def _test_seed_input_dimensions(): with pytest.raises(IndexError): _nominate(adj, np.zeros((5, 5, 5), dtype=np.int))