def get(n=50): ns = [n, n] p1 = np.array([[.9,.1],[.1,.9]]) p2 = np.array([[.9,.1],[.1,.9]]) A1 = sbm(ns,p1) A2 = sbm(ns,p2) X1 = AdjacencySpectralEmbed().fit_transform(A1) X2 = AdjacencySpectralEmbed().fit_transform(A2) return X1, X2
def to_minigraph( adj, labels, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): # convert the adjacency and a partition to a minigraph based on SBM probs prob_df = get_blockmodel_df( adj, labels, return_counts=use_counts, use_weights=use_weights ) if drop_neg and ("-1" in prob_df.index): prob_df.drop("-1", axis=0, inplace=True) prob_df.drop("-1", axis=1, inplace=True) if remove_diag: adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(labels, return_counts=True) # add size attribute base on number of vertices size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") # add signal flow attribute (for the minigraph itself) mini_adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(mini_adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") # add spectral properties sym_adj = symmetrize(mini_adj) n_components = 10 latent = AdjacencySpectralEmbed(n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(uni_labels, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors if color_map is None: color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color") return g
def ase(adj, n_components, ptr=True): if ptr: adj = pass_to_ranks(adj) ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(adj) latent = np.concatenate(latent, axis=-1) return latent
def embed_ase(*, adj, n_components=None): """ JHU AdjacencySpectralEmbed, w/ default settings """ X_ase = AdjacencySpectralEmbed(n_components=n_components).fit_transform(adj.toarray()) if isinstance(X_ase, tuple): X_ase = np.column_stack(X_ase) return X_ase
def bilateral_ase(adj): ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2, check_lcc=False) ipsi_adj = adj.copy() ipsi_adj[np.ix_(left_inds, right_inds)] = 0 ipsi_adj[np.ix_(right_inds, left_inds)] = 0 ipsi_embed = ase.fit_transform(ipsi_adj) procrust = Procrustes() align_ipsi_embed = [] for e in ipsi_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_ipsi_embed.append(align_e) align_ipsi_embed = np.concatenate(align_ipsi_embed, axis=1) contra_adj = adj.copy() contra_adj[np.ix_(left_inds, left_inds)] = 0 contra_adj[np.ix_(right_inds, right_inds)] = 0 contra_embed = ase.fit_transform(contra_adj) procrust = Procrustes() align_contra_embed = [] for e in contra_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_contra_embed.append(align_e) align_contra_embed = np.concatenate(align_contra_embed, axis=1) return align_ipsi_embed, align_contra_embed
def evaluate_models(graph, labels=None, title=None, plot_graphs=False, min_comp=0, max_comp=1, n_comp=5): if plot_graphs: heatmap(graph, inner_hier_labels=cell_labels) ## Set up models to test non_rdpg_models = [ EREstimator(fit_degrees=False), SBEstimator(fit_degrees=False), SBEstimator(fit_degrees=True), ] d = [6] rdpg_models = [RDPGEstimator(n_components=i) for i in d] models = non_rdpg_models + rdpg_models names_nonRDPG = ["ER", "SBM", "DCSBM"] names_RDPG = ["RDPGrank{}".format(i) for i in d] names = names_nonRDPG + names_RDPG bics = [] log_likelihoods = [] ## Test models for model, name in zip(models, names): m = model.fit(graph, y=labels) if plot_graphs: heatmap(m.p_mat_, inner_hier_labels=labels, title=(name + "P matrix")) heatmap(m.sample(), inner_hier_labels=labels, title=(name + "sample")) bic = m.bic(graph) log_likelihoods.append(m.score(graph)) bics.append(bic) plt.show() ase = AdjacencySpectralEmbed(n_components=2) latent = ase.fit_transform(m.p_mat_) # if type(latent) is tuple: # pairplot(np.concatenate((latent[0], latent[1]), axis=1)) # plt.show() # else: print("here") # plt.figure(figsize=(20, 20)) ax = scatterplot(latent, labels=cell_labels, height=4, alpha=0.6, font_scale=1.25) # plt.suptitle(name, y=0.94, x=0.1, fontsize=30, horizontalalignment="left") plt.savefig(name + "latent.png", format="png", dpi=1000) plt.close()
def test_passing_embeddings(self): np.random.seed(123) A1 = er_np(20, 0.8) A2 = er_np(20, 0.8) ase_1 = AdjacencySpectralEmbed(n_components=2) X1 = ase_1.fit_transform(A1) ase_2 = AdjacencySpectralEmbed(n_components=2) X2 = ase_2.fit_transform(A2) ase_3 = AdjacencySpectralEmbed(n_components=1) X3 = ase_3.fit_transform(A2) # check embeddings having weird ndim with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2.reshape(-1, 1, 1)) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1.reshape(-1, 1, 1), X2) # check embeddings having mismatching number of components with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X3) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X3, X1) # check passing weird stuff as input (caught by us) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict("hello there", X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, "hello there") with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict({"hello": "there"}, X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, {"hello": "there"}) # check passing infinite in input (caught by check_array) with self.assertRaises(ValueError): X1_w_inf = X1.copy() X1_w_inf[1, 1] = np.inf ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1_w_inf, X2) # check that the appropriate input works ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2)
def lse(adj, n_components, regularizer=None): if PTR: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD") ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(lap) latent = np.concatenate(latent, axis=-1) return latent
def mc_iter(n, m, p, q, tilde, i=1): X_graph = er_np(n, p*p) ase = AdjacencySpectralEmbed(n_components=1) X = ase.fit_transform(X_graph) Y_graph = er_np(m, q*q) ase = AdjacencySpectralEmbed(n_components=1) Y = ase.fit_transform(Y_graph) if tilde: X_new, Y_new = sample_noisy_points(X, Y) else: X_new, Y_new = X, Y ldt = LatentDistributionTest() pval = ldt.fit(X_new, Y_new, pass_graph=False) return pval
def level(adj, meta, pred, reembed=False, X=None, R=None, plot_all=True): uni_labels, inv = np.unique(pred, return_inverse=True) all_sub_results = [] sub_data = [] for label in uni_labels: print(label) print() label_mask = pred == label sub_meta = meta[label_mask].copy() sub_meta["inds"] = range(len(sub_meta)) sub_left_inds = sub_meta[sub_meta["left"]]["inds"].values sub_right_inds = sub_meta[sub_meta["right"]]["inds"].values sub_lp_inds, sub_rp_inds = get_paired_inds(sub_meta) sub_adj = adj[np.ix_(label_mask, label_mask)] if reembed: ase = AdjacencySpectralEmbed() # TODO look into PTR at this level as well sub_embed = ase.fit_transform(sub_adj) sub_X = np.concatenate(sub_embed, axis=1) sub_R, _ = orthogonal_procrustes(sub_X[sub_lp_inds], sub_X[sub_rp_inds]) sub_X[sub_left_inds] = sub_X[sub_left_inds] @ sub_R else: sub_X = X[label_mask].copy() sub_R = R var_dict = { "meta": sub_meta, "left_inds": sub_left_inds, "right_inds": sub_right_inds, "left_pair_inds": sub_lp_inds, "right_pair_inds": sub_rp_inds, "X": sub_X, "adj": sub_adj, "reembed": reembed, } sub_data.append(var_dict) sub_results = crossval_cluster( sub_X, sub_left_inds, sub_right_inds, left_pair_inds=sub_lp_inds, right_pair_inds=sub_rp_inds, max_clusters=8, min_clusters=1, n_init=50, ) fig, axs = plot_metrics(sub_results, plot_all=plot_all) fig.suptitle(f"Clustering for cluster {label}, reembed={reembed}") stashfig(f"cluster-profile-label={label}-reembed={reembed}") plt.close() all_sub_results.append(sub_results) return all_sub_results, sub_data
def normalized_ase(graph, n_components=None, embed_kws={}): ase = AdjacencySpectralEmbed(n_components=n_components, **embed_kws) latent = ase.fit_transform(graph) if isinstance(latent, tuple): latent = np.concatenate(latent, axis=-1) norm_vec = np.linalg.norm(latent, axis=1) norm_vec[norm_vec == 0] = 1 norm_latent = latent / norm_vec[:, np.newaxis] return norm_latent
def estimate_assignments(graph, n_communities, n_components=None, method="gc", metric=None): """Given a graph and n_comunities, sweeps over covariance structures Not deterministic Not using graph bic or mse to calculate best 1. Does an embedding on the raw graph 2. GaussianCluster on the embedding. This will sweep covariance structure for the given n_communities 3. Returns n_parameters based on the number used in GaussianCluster method can be "gc" or "bc" method "gc" : use graspy GaussianCluster this defaults to full covariance "bc" : tommyclust with defaults so sweep covariance, agglom, linkage "bc-metric" : tommyclust with custom metric still sweep everything "bc-none" : mostly for testing, should behave just like GaussianCluster """ embed_graph = graph.copy() latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(embed_graph) if isinstance(latent, tuple): latent = np.concatenate(latent, axis=1) if method == "gc": gc = GaussianCluster( min_components=n_communities, max_components=n_communities, covariance_type="all", ) vertex_assignments = gc.fit_predict(latent) n_params = gc.model_._n_parameters() elif method == "bc": vertex_assignments, n_params = brute_cluster(latent, [n_communities]) elif method == "bc-metric": vertex_assignments, n_params = brute_cluster(latent, [n_communities], metric=metric) elif method == "bc-none": vertex_assignments, n_params = brute_cluster( latent, [n_communities], affinities=["none"], linkages=["none"], covariance_types=["full"], ) else: raise ValueError("Unspecified clustering method") return (vertex_assignments, n_params)
def ase_concatenate(adjs, n_components, ptr=True): if ptr: adjs = [pass_to_ranks(a) for a in adjs] ase = AdjacencySpectralEmbed(n_components=n_components // len(adjs)) graph_latents = [] for a in adjs: latent = ase.fit_transform(a) latent = np.concatenate(latent, axis=-1) graph_latents.append(latent) latent = np.concatenate(graph_latents, axis=-1) return latent
def lse(adj, n_components, regularizer=None, ptr=True): if ptr: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD", regularizer=regularizer) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) latent = ase.fit_transform(lap) # latent = LaplacianSpectralEmbed( # form="R-DAD", n_components=n_components, regularizer=regularizer # ) latent = np.concatenate(latent, axis=-1) return latent
def ase_procrust_svd(embed_adjs): ase = AdjacencySpectralEmbed(n_components=None) all_embeds = [] for a in embed_adjs: embed = ase.fit_transform(a) embed = np.concatenate(embed, axis=1) embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes( embed[lp_inds], embed[rp_inds])[0]) print(embed.shape) all_embeds.append(embed) cat_embed = np.concatenate(all_embeds, axis=1) print(cat_embed.shape) U, S, Vt = selectSVD(cat_embed, n_elbows=3) return U
def _embed(self, A1, A2): if self.n_components is None: num_dims1 = select_dimension(A1)[0][-1] num_dims2 = select_dimension(A2)[0][-1] self.n_components = max(num_dims1, num_dims2) ase = AdjacencySpectralEmbed(n_components=self.n_components) X1_hat = ase.fit_transform(A1) X2_hat = ase.fit_transform(A2) if isinstance(X1_hat, tuple) and isinstance(X2_hat, tuple): X1_hat = np.concatenate(X1_hat, axis=-1) X2_hat = np.concatenate(X2_hat, axis=-1) elif isinstance(X1_hat, tuple) ^ isinstance(X2_hat, tuple): msg = ("input graphs do not have same directedness. " "consider symmetrizing the directed graph.") raise ValueError(msg) return X1_hat, X2_hat
def add_attributes( g, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): nodelist = list(g.nodes()) # add spectral properties sym_adj = symmetrize(nx.to_numpy_array(g, nodelist=nodelist)) n_components = 10 latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(nodelist, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors # nx.set_node_attributes(g, color_map, name="Color") for node, data in g.nodes(data=True): c = data["cell_class"] color = CLASS_COLOR_DICT[c] data["color"] = color # add size attribute base on number of edges size_map = dict(path_graph.degree(weight="weight")) nx.set_node_attributes(g, size_map, name="Size") return g
def _embed(self, adj=None): if adj is None: adj = self.adj # TODO look into PTR at this level as well # lp_inds, rp_inds = get_paired_inds(self.meta) lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) if self.embed == "ase": embedder = AdjacencySpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows ) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = pass_to_ranks(adj) embed_adj = augment_diagonal(embed_adj) embed = selectSVD( embed_adj, n_components=self.n_components, n_elbows=self.n_elbows ) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) X[self.left_inds] = X[self.left_inds] @ R if self.normalize: row_sums = np.sum(X, axis=1) X /= row_sums[:, None] return X
def _embed(self, adj=None): if adj is None: adj = self.adj lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) # TODO PTR here? if self.plus_c: embed_adj += 1 / adj.size if self.embed == "ase": embedder = AdjacencySpectralEmbed(n_components=self.n_components, n_elbows=self.n_elbows) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = augment_diagonal(embed_adj) embed = selectSVD(embed_adj, n_components=self.n_components, n_elbows=self.n_elbows) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") X = self._procrustes(X) if self.normalize: row_norms = np.linalg.norm(X, axis=1) X /= row_norms[:, None] return X
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from graspy.utils import get_lcc # Adjacency Spectral embedding print( f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) ase = AdjacencySpectralEmbed() ase_fit = ase.fit_transform(get_lcc(mat)) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy" out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib" dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df print(prob_df.head()) g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(adjusted_partition, return_counts=True) size_scaler = 8 size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") sym_adj = symmetrize(adj) node_lap = AdjacencySpectralEmbed(n_components=10).fit_transform(sym_adj) # node_lap = np.squeeze(node_lap) i = 5 node_lap = node_lap[:, i] lap_map = dict(zip(uni_labels, node_lap)) nx.set_node_attributes(g, lap_map, name="Laplacian-2") pos = nx.spring_layout(g) new_pos = {} for key, val in pos.items(): new_pos[key] = val[1] nx.set_node_attributes(g, new_pos, name="Spring") color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color")
# close to what we set originally if we undo the rescaling step. # %% double checking on model params sbme = SBMEstimator(directed=False, loops=False) sbme.fit(adj, y=labels) block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws) ase_embed = ase.fit_transform(adj) lse_embed = lse.fit_transform(adj) rlse_embed = rlse.fit_transform(adj) embeddings_list = [ase_embed, lse_embed, rlse_embed] remove_first = False for i, embedding in enumerate(embeddings_list): if remove_first: embeddings_list[i] = embedding[:, 1:] else: embeddings_list[i] = embedding[:, :k]
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="all_nodes", n_components=None, prune=0, norm=1): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- mat : ndarray or nx.Graph An nxn adjacency matrix or graph object. atlas : str The name of an atlas (indicating the node definition). graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import os import networkx as nx import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from pynets.stats.netstats import CleanGraphs #from graspy.utils import get_lcc # Adjacency Spectral embedding print(f"{'Embedding unimodal asetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}") ase = AdjacencySpectralEmbed(n_components=n_components) cg = CleanGraphs(None, None, graph_path, prune, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = mat if float(prune) >= 1: graph_path_tmp = cg.prune_graph()[1] mat_clean = np.load(graph_path_tmp) else: mat_clean = mat ase_fit = ase.fit_transform(mat_clean) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/gradient-ASE" \ f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}" # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \ # f"_{subgraph_name}" \ # f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib" #dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
stashfig("random-sf") adj_df = pd.DataFrame() adj_df["Signal flow"] = true_z adj_df["Input"] = "Adjacency" adj_df["Block"] = labels # fg = sns.FacetGrid(dist_df, col="Label", col_wrap=2, aspect=2, hue="Label") # fg.map(sns.distplot, "Signal flow") # stashfig("sf-dists") # %% [markdown] # # try with p_hat from graspy.embed import AdjacencySpectralEmbed latent = AdjacencySpectralEmbed(n_components=n_blocks).fit_transform(A) P_hat = latent[0] @ latent[1].T # P_hat -= P_hat.min() heatmap(P_hat, title=r"$\hat{P}$") stashfig("p-hat") true_z = signal_flow(P_hat) sort_inds = np.argsort(true_z)[::-1] heatmap( A[np.ix_(sort_inds, sort_inds)], cbar=False, title=r"Feedforward SBM, sorted by $\hat{P}$ signal flow", ) stashfig("ffSBM-phat-sf") A_fake = A.copy().ravel() np.random.shuffle(A_fake)
#%% %matplotlib inline from graspy.plot import * from graspy.simulations import sbm from graspy.embed import AdjacencySpectralEmbed import numpy as np import matplotlib.pyplot as plt import seaborn as sns g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]]) ase = AdjacencySpectralEmbed() X = ase.fit_transform(g) labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2] # pairplot(X, size=50, alpha=0.6) plt.show()
meta = mg.meta adj = mg.adj adj = pass_to_ranks(adj) meta["inds"] = range(len(meta)) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] lp_inds, rp_inds = get_paired_inds(meta) # %% [markdown] # ## Embed # Here the embedding is ASE, with PTR and DiagAug, the number of embedding dimensions # is for now set to ZG2 (4 + 4). Using the known pairs as "seeds", the left embedding # is matched to the right using procrustes. ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = ase.fit_transform(adj) n_components = embed[0].shape[1] # use all of ZG2 X = np.concatenate((embed[0][:, :n_components], embed[1][:, :n_components]), axis=-1) R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) if CLUSTER_SPLIT == "best": X[left_inds] = X[left_inds] @ R # %% [markdown] # ## Clustering # Clustering is performed using Gaussian mixture modeling. At each candidate value of k, # 50 models are trained on the left embedding, 50 models are trained on the right # embedding (choosing the best covariance structure based on BIC on the train set). results = crossval_cluster(
# ) #%% from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed from graspy.utils import pass_to_ranks from graspy.plot import pairplot sum_adj = np.sum(np.array(mb_color_graphs), axis=0) n_components = 4 # ptr_adj = pass_to_ranks(sum_adj) ase = AdjacencySpectralEmbed(n_components=n_components) sum_latent = ase.fit_transform(ptr_adj) sum_latent = np.concatenate(sum_latent, axis=-1) pairplot(sum_latent, labels=mb_class_labels) ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs] # graph_sum = [np.sum(a) for a in mb_color_graphs] # ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)] omni = OmnibusEmbed(n_components=n_components // 4) color_latent = omni.fit_transform(ptr_color_adjs) color_latent = np.concatenate(color_latent, axis=-1) color_latent = np.concatenate(color_latent, axis=-1) pairplot(color_latent, labels=mb_class_labels) from graspy.embed import MultipleASE
ax.set_yticklabels([name0, name1, name2, name3]) # %% [markdown] # ## matrixplot( path_indicator_mat[:50, :50], plot_type="scattermap", sizes=(0.2, 0.2), hue="weight", palette=sns.color_palette("husl", n_colors=10), ax=ax, ) # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(adj) embed = np.concatenate(embed, axis=-1) pairplot(embed, labels=labels, palette="tab20") # %% [markdown] # ## Run paths print(f"Running {n_init} random walks from each source node...") transition_probs = to_markov_matrix(adj) out_inds = np.where(labels == n_blocks - 1)[0] source_inds = np.where(labels == 0)[0] def rw_from_node(s):
# close to what we set originally if we undo the rescaling step. # %% double checking on model params sbme = SBMEstimator(directed=False, loops=False) sbme.fit(adj, y=labels) block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws) ase_embed = ase.fit_transform(adj) lse_embed = lse.fit_transform(adj) rlse_embed = rlse.fit_transform(adj) embeddings_list = [ase_embed, lse_embed, rlse_embed] remove_first = True for i, embedding in enumerate(embeddings_list): if remove_first: embeddings_list[i] = embedding[:, 1:] else: embeddings_list[i] = embedding[:, :k]
from graspy.simulations import er_np # Experiment parameters n_verts = 200 p = 0.5 n_components = 1 n_sims = 1000 # Run experiment estimated_latents = np.zeros((n_sims, 2)) for i in range(n_sims): graph = er_np(n_verts, p, directed=False, loops=False) ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) diag_latent = ase_diag.fit_transform(graph) ase_latent = ase.fit_transform(graph) mean_diag_latent = np.mean(diag_latent) mean_latent = np.mean(ase_latent) estimated_latents[i, 0] = mean_diag_latent estimated_latents[i, 1] = mean_latent diffs = estimated_latents - np.sqrt(p) # the true latent position is sqrt(p) # Plot results