def bilateral_ase(adj): ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2, check_lcc=False) ipsi_adj = adj.copy() ipsi_adj[np.ix_(left_inds, right_inds)] = 0 ipsi_adj[np.ix_(right_inds, left_inds)] = 0 ipsi_embed = ase.fit_transform(ipsi_adj) procrust = Procrustes() align_ipsi_embed = [] for e in ipsi_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_ipsi_embed.append(align_e) align_ipsi_embed = np.concatenate(align_ipsi_embed, axis=1) contra_adj = adj.copy() contra_adj[np.ix_(left_inds, left_inds)] = 0 contra_adj[np.ix_(right_inds, right_inds)] = 0 contra_embed = ase.fit_transform(contra_adj) procrust = Procrustes() align_contra_embed = [] for e in contra_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_contra_embed.append(align_e) align_contra_embed = np.concatenate(align_contra_embed, axis=1) return align_ipsi_embed, align_contra_embed
class ASEEmbedding(Embedding): """Implements an interface for adjacency spectral embedding; inherits from the Embedding class. """ def __init__(self): self.model = AdjacencySpectralEmbed() def fit(self, X, S=None): Xh = np.hstack(self.model.fit_transform(X)) if S is not None: Xh = np.hstack((Xh, S)) clusterer = GaussianMixture(n_components=Xh.shape[1] // 2) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) self.y = predict_labels self.H = Xh def learn_embedding(self, G, S=None, **kwargs): X = nx.adjacency_matrix(G) X = X.todense() Xh = np.hstack(self.model.fit_transform(X)) if S is not None: Xh = np.hstack((Xh, S)) clusterer = GaussianMixture(n_components=Xh.shape[1] // 2) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) self.y = predict_labels self.H = Xh def get_reconstructed_adj(self, *a, **b): return self.model.latent_left_.dot(np.diag( self.model.singular_values_)).dot(self.model.latent_right_.T)
def ase(adj, n_components, ptr=True): if ptr: adj = pass_to_ranks(adj) ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(adj) latent = np.concatenate(latent, axis=-1) return latent
def evaluate_models(graph, labels=None, title=None, plot_graphs=False, min_comp=0, max_comp=1, n_comp=5): if plot_graphs: heatmap(graph, inner_hier_labels=cell_labels) ## Set up models to test non_rdpg_models = [ EREstimator(fit_degrees=False), SBEstimator(fit_degrees=False), SBEstimator(fit_degrees=True), ] d = [6] rdpg_models = [RDPGEstimator(n_components=i) for i in d] models = non_rdpg_models + rdpg_models names_nonRDPG = ["ER", "SBM", "DCSBM"] names_RDPG = ["RDPGrank{}".format(i) for i in d] names = names_nonRDPG + names_RDPG bics = [] log_likelihoods = [] ## Test models for model, name in zip(models, names): m = model.fit(graph, y=labels) if plot_graphs: heatmap(m.p_mat_, inner_hier_labels=labels, title=(name + "P matrix")) heatmap(m.sample(), inner_hier_labels=labels, title=(name + "sample")) bic = m.bic(graph) log_likelihoods.append(m.score(graph)) bics.append(bic) plt.show() ase = AdjacencySpectralEmbed(n_components=2) latent = ase.fit_transform(m.p_mat_) # if type(latent) is tuple: # pairplot(np.concatenate((latent[0], latent[1]), axis=1)) # plt.show() # else: print("here") # plt.figure(figsize=(20, 20)) ax = scatterplot(latent, labels=cell_labels, height=4, alpha=0.6, font_scale=1.25) # plt.suptitle(name, y=0.94, x=0.1, fontsize=30, horizontalalignment="left") plt.savefig(name + "latent.png", format="png", dpi=1000) plt.close()
def test_passing_embeddings(self): np.random.seed(123) A1 = er_np(20, 0.8) A2 = er_np(20, 0.8) ase_1 = AdjacencySpectralEmbed(n_components=2) X1 = ase_1.fit_transform(A1) ase_2 = AdjacencySpectralEmbed(n_components=2) X2 = ase_2.fit_transform(A2) ase_3 = AdjacencySpectralEmbed(n_components=1) X3 = ase_3.fit_transform(A2) # check embeddings having weird ndim with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2.reshape(-1, 1, 1)) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1.reshape(-1, 1, 1), X2) # check embeddings having mismatching number of components with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X3) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X3, X1) # check passing weird stuff as input (caught by us) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict("hello there", X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, "hello there") with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict({"hello": "there"}, X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, {"hello": "there"}) # check passing infinite in input (caught by check_array) with self.assertRaises(ValueError): X1_w_inf = X1.copy() X1_w_inf[1, 1] = np.inf ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1_w_inf, X2) # check that the appropriate input works ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2)
def lse(adj, n_components, regularizer=None): if PTR: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD") ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(lap) latent = np.concatenate(latent, axis=-1) return latent
def mc_iter(n, m, p, q, tilde, i=1): X_graph = er_np(n, p*p) ase = AdjacencySpectralEmbed(n_components=1) X = ase.fit_transform(X_graph) Y_graph = er_np(m, q*q) ase = AdjacencySpectralEmbed(n_components=1) Y = ase.fit_transform(Y_graph) if tilde: X_new, Y_new = sample_noisy_points(X, Y) else: X_new, Y_new = X, Y ldt = LatentDistributionTest() pval = ldt.fit(X_new, Y_new, pass_graph=False) return pval
def level(adj, meta, pred, reembed=False, X=None, R=None, plot_all=True): uni_labels, inv = np.unique(pred, return_inverse=True) all_sub_results = [] sub_data = [] for label in uni_labels: print(label) print() label_mask = pred == label sub_meta = meta[label_mask].copy() sub_meta["inds"] = range(len(sub_meta)) sub_left_inds = sub_meta[sub_meta["left"]]["inds"].values sub_right_inds = sub_meta[sub_meta["right"]]["inds"].values sub_lp_inds, sub_rp_inds = get_paired_inds(sub_meta) sub_adj = adj[np.ix_(label_mask, label_mask)] if reembed: ase = AdjacencySpectralEmbed() # TODO look into PTR at this level as well sub_embed = ase.fit_transform(sub_adj) sub_X = np.concatenate(sub_embed, axis=1) sub_R, _ = orthogonal_procrustes(sub_X[sub_lp_inds], sub_X[sub_rp_inds]) sub_X[sub_left_inds] = sub_X[sub_left_inds] @ sub_R else: sub_X = X[label_mask].copy() sub_R = R var_dict = { "meta": sub_meta, "left_inds": sub_left_inds, "right_inds": sub_right_inds, "left_pair_inds": sub_lp_inds, "right_pair_inds": sub_rp_inds, "X": sub_X, "adj": sub_adj, "reembed": reembed, } sub_data.append(var_dict) sub_results = crossval_cluster( sub_X, sub_left_inds, sub_right_inds, left_pair_inds=sub_lp_inds, right_pair_inds=sub_rp_inds, max_clusters=8, min_clusters=1, n_init=50, ) fig, axs = plot_metrics(sub_results, plot_all=plot_all) fig.suptitle(f"Clustering for cluster {label}, reembed={reembed}") stashfig(f"cluster-profile-label={label}-reembed={reembed}") plt.close() all_sub_results.append(sub_results) return all_sub_results, sub_data
def normalized_ase(graph, n_components=None, embed_kws={}): ase = AdjacencySpectralEmbed(n_components=n_components, **embed_kws) latent = ase.fit_transform(graph) if isinstance(latent, tuple): latent = np.concatenate(latent, axis=-1) norm_vec = np.linalg.norm(latent, axis=1) norm_vec[norm_vec == 0] = 1 norm_latent = latent / norm_vec[:, np.newaxis] return norm_latent
def lse(adj, n_components, regularizer=None, ptr=True): if ptr: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD", regularizer=regularizer) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) latent = ase.fit_transform(lap) # latent = LaplacianSpectralEmbed( # form="R-DAD", n_components=n_components, regularizer=regularizer # ) latent = np.concatenate(latent, axis=-1) return latent
def ase_concatenate(adjs, n_components, ptr=True): if ptr: adjs = [pass_to_ranks(a) for a in adjs] ase = AdjacencySpectralEmbed(n_components=n_components // len(adjs)) graph_latents = [] for a in adjs: latent = ase.fit_transform(a) latent = np.concatenate(latent, axis=-1) graph_latents.append(latent) latent = np.concatenate(graph_latents, axis=-1) return latent
def _embed(self, adj=None): if adj is None: adj = self.adj # TODO look into PTR at this level as well # lp_inds, rp_inds = get_paired_inds(self.meta) lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) if self.embed == "ase": embedder = AdjacencySpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows ) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = pass_to_ranks(adj) embed_adj = augment_diagonal(embed_adj) embed = selectSVD( embed_adj, n_components=self.n_components, n_elbows=self.n_elbows ) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) X[self.left_inds] = X[self.left_inds] @ R if self.normalize: row_sums = np.sum(X, axis=1) X /= row_sums[:, None] return X
def _embed(self, adj=None): if adj is None: adj = self.adj lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) # TODO PTR here? if self.plus_c: embed_adj += 1 / adj.size if self.embed == "ase": embedder = AdjacencySpectralEmbed(n_components=self.n_components, n_elbows=self.n_elbows) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = augment_diagonal(embed_adj) embed = selectSVD(embed_adj, n_components=self.n_components, n_elbows=self.n_elbows) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") X = self._procrustes(X) if self.normalize: row_norms = np.linalg.norm(X, axis=1) X /= row_norms[:, None] return X
def ase_procrust_svd(embed_adjs): ase = AdjacencySpectralEmbed(n_components=None) all_embeds = [] for a in embed_adjs: embed = ase.fit_transform(a) embed = np.concatenate(embed, axis=1) embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes( embed[lp_inds], embed[rp_inds])[0]) print(embed.shape) all_embeds.append(embed) cat_embed = np.concatenate(all_embeds, axis=1) print(cat_embed.shape) U, S, Vt = selectSVD(cat_embed, n_elbows=3) return U
def _embed(self, A1, A2): if self.n_components is None: num_dims1 = select_dimension(A1)[0][-1] num_dims2 = select_dimension(A2)[0][-1] self.n_components = max(num_dims1, num_dims2) ase = AdjacencySpectralEmbed(n_components=self.n_components) X1_hat = ase.fit_transform(A1) X2_hat = ase.fit_transform(A2) if isinstance(X1_hat, tuple) and isinstance(X2_hat, tuple): X1_hat = np.concatenate(X1_hat, axis=-1) X2_hat = np.concatenate(X2_hat, axis=-1) elif isinstance(X1_hat, tuple) ^ isinstance(X2_hat, tuple): msg = ("input graphs do not have same directedness. " "consider symmetrizing the directed graph.") raise ValueError(msg) return X1_hat, X2_hat
pairs(joint_embed) stashfig(f"joint-embed-{name}") # %% [markdown] # ## U = omni_procrust_svd(embed_adjs) # %% [markdown] # ## from sklearn.metrics import pairwise_distances ase = AdjacencySpectralEmbed(n_components=None) all_embeds = [] all_pdists = [] for a in embed_adjs: both_embed = ase.fit_transform(a) # embed = np.concatenate(embed, axis=1) for embed in both_embed: embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes( embed[lp_inds], embed[rp_inds])[0]) print(embed.shape) all_embeds.append(embed) pdist = pairwise_distances(embed, metric="cosine") all_pdists.append(pdist) from mvlearn.embed import MVMDS mvmds = MVMDS(n_components=6) mv_embed = mvmds.fit_transform(all_pdists) pairs(mv_embed)
#%% %matplotlib inline from graspy.plot import * from graspy.simulations import sbm from graspy.embed import AdjacencySpectralEmbed import numpy as np import matplotlib.pyplot as plt import seaborn as sns g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]]) ase = AdjacencySpectralEmbed() X = ase.fit_transform(g) labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2] # pairplot(X, size=50, alpha=0.6) plt.show()
adj = mg.adj adj = pass_to_ranks(adj) meta["inds"] = range(len(meta)) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] lp_inds, rp_inds = get_paired_inds(meta) # %% [markdown] # ## Embed # Here the embedding is ASE, with PTR and DiagAug, the number of embedding dimensions # is for now set to ZG2 (4 + 4). Using the known pairs as "seeds", the left embedding # is matched to the right using procrustes. ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = ase.fit_transform(adj) n_components = embed[0].shape[1] # use all of ZG2 X = np.concatenate((embed[0][:, :n_components], embed[1][:, :n_components]), axis=-1) R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) if CLUSTER_SPLIT == "best": X[left_inds] = X[left_inds] @ R # %% [markdown] # ## Clustering # Clustering is performed using Gaussian mixture modeling. At each candidate value of k, # 50 models are trained on the left embedding, 50 models are trained on the right # embedding (choosing the best covariance structure based on BIC on the train set). results = crossval_cluster( X,
#%% from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed from graspy.utils import pass_to_ranks from graspy.plot import pairplot sum_adj = np.sum(np.array(mb_color_graphs), axis=0) n_components = 4 # ptr_adj = pass_to_ranks(sum_adj) ase = AdjacencySpectralEmbed(n_components=n_components) sum_latent = ase.fit_transform(ptr_adj) sum_latent = np.concatenate(sum_latent, axis=-1) pairplot(sum_latent, labels=mb_class_labels) ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs] # graph_sum = [np.sum(a) for a in mb_color_graphs] # ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)] omni = OmnibusEmbed(n_components=n_components // 4) color_latent = omni.fit_transform(ptr_color_adjs) color_latent = np.concatenate(color_latent, axis=-1) color_latent = np.concatenate(color_latent, axis=-1) pairplot(color_latent, labels=mb_class_labels) from graspy.embed import MultipleASE mase = MultipleASE(n_components=n_components)
conf_mat = confusion_matrix(right_int_labels, pred_labels) conf_mat = conf_mat[:, ] sns.heatmap(conf_mat, annot=True) #%% from graspy.embed import OmnibusEmbed, AdjacencySpectralEmbed from scipy.linalg import orthogonal_procrustes sns.set_palette("deep") # omni = OmnibusEmbed(n_components=2) # latent = omni.fit_transform([right_graph, gs.model_.p_mat_]) # latent = np.concatenate(latent, axis=-1) n_components = 3 ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(right_graph) latent = np.concatenate(latent, axis=-1) p_latent = ase.fit_transform(gs.model_.p_mat_) p_latent = np.concatenate(p_latent, axis=-1) R, scale = orthogonal_procrustes(p_latent, latent) p_latent = p_latent @ R n_components *= 2 scatter_kws = dict(legend=False, linewidth=0, s=30) cmap1 = sns.color_palette("Set1", n_colors=4) cmap2 = np.array(sns.color_palette("Set1", n_colors=4, desat=0.4)) cmap2 = cmap2[[3, 0, 1, 2]] cmap2 = list(cmap2)
simultaneous=simultaneous, stop_nodes=source_inds, max_hops=max_hops, allow_loops=False, ) back_hop_hist = td.multistart(out_inds) back_hop_hist = back_hop_hist.T full_hop_hist = np.concatenate((fwd_hop_hist, back_hop_hist), axis=0) # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(pass_to_ranks(adj)) embed = np.concatenate(embed, axis=-1) lp_inds, rp_inds = get_paired_inds(meta) R, _, = orthogonal_procrustes(embed[lp_inds], embed[rp_inds]) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] embed[left_inds] = embed[left_inds] @ R # %% [markdown] # ## joint = np.concatenate((embed, full_hop_hist.T), axis=1)
right_inds = meta[meta["right"]]["inds"] def remove_axis(ax): remove_spines(ax) ax.set_xlabel("") ax.set_ylabel("") ax.set_xticks([]) ax.set_yticks([]) method = "ortho" print("Embedding graph...") embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) in_embed, out_embed = embedder.fit_transform(pass_to_ranks(adj)) procrust = Procrustes(method=method) # procrust.fit(in_embed, x_seeds=lp_inds, y_seeds=rp_inds) embed = np.concatenate((in_embed, out_embed), axis=-1) dim1 = 0 dim2 = 4 fig, axs = plt.subplots(2, 2, figsize=(20, 20)) plot_df = pd.DataFrame(data=embed[:, [0, 1]]) plot_df["merge_class"] = meta["merge_class"].values ax = axs[0, 0] sns.scatterplot( data=plot_df, x=0, y=1,
from graspy.simulations import sbm from numpy.core.shape_base import block from sklearn.mixture import GaussianMixture sns.set_context("talk") n_per_comm = [1000, 1000, 1000] n_verts = np.sum(n_per_comm) block_probs = np.array([[0.5, 0.1, 0.1], [0.1, 0.5, 0.1], [0.1, 0.1, 0.5]]) adj, labels = sbm(n_per_comm, block_probs, return_labels=True) # %% ase = AdjacencySpectralEmbed(n_components=3) Xhat = ase.fit_transform(adj) # %% # REF: Anton def _fit_plug_in_variance_estimator(X): """ Takes in ASE of a graph and returns a function that estimates the variance-covariance matrix at a given point using the plug-in estimator from the RDPG Central Limit Theorem. Parameters ---------- X : np.ndarray, shape (n, d) adjacency spectral embedding of a graph
def simulation(n, pi, normal_params, beta_params, cond_ind=True, errors=None, smooth=False, acorn=None): #- Type checks if isinstance(normal_params, list): sbm_check = False # there are other checks to do.. elif isinstance(normal_params, np.ndarray): if normal_params.ndim is 2: if np.sum(normal_params == normal_params.T) == np.prod( normal_params.shape): sbm_check = True else: msg = 'if normal_params is a 2 dimensional array it must be symmetric' raise ValueError(msg) else: msg = 'if normal_params is an array, it must be a 2 dimensional array' raise TypeError(msg) else: msg = 'normal_params must be either a list or a 2 dimensional array' raise TypeError(msg) if acorn is None: acorn = np.random.randint(10**6) np.random.seed(acorn) #- Multinomial trial counts = np.random.multinomial(n, [pi, 1 - pi]) #- Hard code the number of blocks K = 2 #- Set labels labels = np.concatenate((np.zeros(counts[0]), np.ones(counts[1]))) #- number of seeds = n_{i}/10 n_seeds = np.round(0.1 * counts).astype(int) #- Set training and test data class_train_idx = [ range(np.sum(counts[:k]), np.sum(counts[:k]) + n_seeds[k]) for k in range(K) ] train_idx = np.concatenate((class_train_idx)).astype(int) test_idx = [k for k in range(n) if k not in train_idx] #- Total number of seeds m = np.sum(n_seeds) #- estimate class probabilities pi_hats = n_seeds / m #- Sample from beta distributions beta_samples = beta_sampler(counts, beta_params) Z = beta_samples #- Sample from multivariate normal or SBM either independently of Zs or otherwise if cond_ind: if sbm_check: A = sbm(counts, normal_params) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = MVN_sampler(counts, normal_params) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] else: if sbm_check: P = blowup( normal_params, counts ) # A big version of B to be able to change connectivity probabilities of individual nodes scales = np.prod(Z, axis=1)**( 1 / Z.shape[1] ) # would do just the outer product, but if the Z's are too small we risk not being connected new_P = P * (scales @ scale.T) # new probability matrix A = sbm(np.ones(n).astype(int), new_P) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = conditional_MVN_sampler(Z=Z, rho=1, counts=counts, params=normal_params, seed=None) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] XZ = np.concatenate((X, Z), axis=1) #- Estimate normal parameters using seeds params = [] for i in range(K): temp_mu, temp_cov = estimate_normal_parameters(X[class_train_idx[i]]) params.append([temp_mu, temp_cov]) #- Using conditional indendence assumption (RF, KNN used for posterior estimates) if errors is None: errors = [[] for i in range(5)] rf1 = RF(n_estimators=100, max_depth=int(np.round(np.log(Z[train_idx].shape[0])))) rf1.fit(Z[train_idx], labels[train_idx]) knn1 = KNN(n_neighbors=int(np.round(np.log(Z[train_idx].shape[0])))) knn1.fit(Z[train_idx], labels[train_idx]) if smooth: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) else: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) knn1 = KNN(n_neighbors=int(np.round(np.log(m)))) knn1.fit(Z[train_idx], labels[train_idx]) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) temp_pred = QDA(X[test_idx], pi_hats, params) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[2].append(temp_error) #- Not using conditional independence assumption (RF, KNN used for classification) XZseeds = np.concatenate((X[train_idx], Z[train_idx]), axis=1) rf2 = RF(n_estimators=100, max_depth=int(np.round(np.log(m)))) rf2.fit(XZ[train_idx], labels[train_idx]) temp_pred = rf2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[3].append(temp_error) knn2 = KNN(n_neighbors=int(np.round(np.log(m)))) knn2.fit(XZ[train_idx], labels[train_idx]) temp_pred = knn2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[4].append(temp_error) temp_accuracy = GCN(adj, features, train_idx, labels) temp_error = 1 - temp_accuracy errors[5].append(temp_error) return errors
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="all_nodes", n_components=None, prune=0, norm=1): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- mat : ndarray or nx.Graph An nxn adjacency matrix or graph object. atlas : str The name of an atlas (indicating the node definition). graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import os import networkx as nx import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from pynets.stats.netstats import CleanGraphs #from graspy.utils import get_lcc # Adjacency Spectral embedding print(f"{'Embedding unimodal asetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}") ase = AdjacencySpectralEmbed(n_components=n_components) cg = CleanGraphs(None, None, graph_path, prune, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = mat if float(prune) >= 1: graph_path_tmp = cg.prune_graph()[1] mat_clean = np.load(graph_path_tmp) else: mat_clean = mat ase_fit = ase.fit_transform(mat_clean) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/gradient-ASE" \ f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}" # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \ # f"_{subgraph_name}" \ # f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib" #dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
# %% [markdown] # ## matrixplot( path_indicator_mat[:50, :50], plot_type="scattermap", sizes=(0.2, 0.2), hue="weight", palette=sns.color_palette("husl", n_colors=10), ax=ax, ) # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(adj) embed = np.concatenate(embed, axis=-1) pairplot(embed, labels=labels, palette="tab20") # %% [markdown] # ## Run paths print(f"Running {n_init} random walks from each source node...") transition_probs = to_markov_matrix(adj) out_inds = np.where(labels == n_blocks - 1)[0] source_inds = np.where(labels == 0)[0] def rw_from_node(s): paths = []
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from graspy.utils import get_lcc # Adjacency Spectral embedding print( f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) ase = AdjacencySpectralEmbed() ase_fit = ase.fit_transform(get_lcc(mat)) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy" out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib" dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
"APL": "MBIN", "sens": "ORN", } rows = [] for side in ["left", "right"]: print(side) side_mb_mg = side_mgs[side] labels = side_mb_mg.meta["class1"].values labels = np.vectorize(label_map.get)(labels) plot_labels = side_mb_mg.meta["merge_class"].values # embed ase = AdjacencySpectralEmbed(n_components=None, algorithm="randomized") embed = ase.fit_transform(pass_to_ranks(side_mb_mg.adj)) embed = np.concatenate(embed, axis=1) # cluster using AutoGMM method = "AutoGMM" agmm = AutoGMMCluster( min_components=2, max_components=10, affinity=["euclidean", "manhattan", "cosine"], covariance_type=["full"], n_jobs=-1, ) agmm.fit(embed, labels) agmm_results = agmm.results_.copy() agmm_results.sort_values("bic/aic", inplace=True) agmm_model = agmm.model_
block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws) ase_embed = ase.fit_transform(adj) lse_embed = lse.fit_transform(adj) rlse_embed = rlse.fit_transform(adj) embeddings_list = [ase_embed, lse_embed, rlse_embed] remove_first = True for i, embedding in enumerate(embeddings_list): if remove_first: embeddings_list[i] = embedding[:, 1:] else: embeddings_list[i] = embedding[:, :k] #%% setting up for plotting degrees = adj.sum(axis=1) methods = ["ase", "lse", "rlse"] embeddings = dict(zip(methods, embeddings_list))
n_verts = 200 p = 0.5 n_components = 1 n_sims = 1000 # Run experiment estimated_latents = np.zeros((n_sims, 2)) for i in range(n_sims): graph = er_np(n_verts, p, directed=False, loops=False) ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) diag_latent = ase_diag.fit_transform(graph) ase_latent = ase.fit_transform(graph) mean_diag_latent = np.mean(diag_latent) mean_latent = np.mean(ase_latent) estimated_latents[i, 0] = mean_diag_latent estimated_latents[i, 1] = mean_latent diffs = estimated_latents - np.sqrt(p) # the true latent position is sqrt(p) # Plot results plt.style.use("seaborn-white") sns.set_palette("deep") sns.set_context("talk", font_scale=1)