def fit(self, Xs): """ Fit the model with Xs and apply the embedding on Xs. The embeddings are saved as a class attribute. Parameters ========== Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) The data to embed based on the prior fit function. Each X in Xs will receive its own embedding. """ Xs = check_Xs(Xs) dissimilarities = [] for X in Xs: if self.normalize is not None: X = normalize(X, norm=self.normalize) dissimilarity = pairwise_distances(X, metric=self.distance_metric) dissimilarities.append(dissimilarity) embedder = OmnibusEmbed(n_components=self.n_components, algorithm=self.algorithm, n_iter=self.n_iter) self.embeddings_ = embedder.fit_transform(dissimilarities)
def omni_embed(pop_array): variance_threshold = VarianceThreshold(threshold=0.05) diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))]) diags_red = diags.reshape(diags.shape[0], diags.shape[1] * diags.shape[2]) var_thr = variance_threshold.fit(diags_red.T) graphs_ix_keep = var_thr.get_support(indices=True) pop_array_red = [pop_array[i] for i in graphs_ix_keep] # Omnibus embedding -- random dot product graph (rdpg) print("%s%s%s" % ('Embedding ensemble for atlas: ', atlas, '...')) omni = OmnibusEmbed(check_lcc=False) try: omni_fit = omni.fit_transform(pop_array_red) mds = ClassicalMDS() mds_fit = mds.fit_transform(omni_fit) except: omni_fit = omni.fit_transform(pop_array) mds = ClassicalMDS() mds_fit = mds.fit_transform(omni_fit) # Transform omnibus tensor into dissimilarity feature dir_path = os.path.dirname(graph_path) out_path = "%s%s%s%s%s%s" % (dir_path, '/', list(flatten(ID))[0], '_omnetome_', atlas, '.npy') print('Saving...') np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return
def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name='whole_brain'): from graspy.embed import OmnibusEmbed, ClassicalMDS variance_threshold = VarianceThreshold(threshold=0.00001) diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))]) graphs_ix_keep = variance_threshold.fit( diags.reshape(diags.shape[0], diags.shape[1] * diags.shape[2]).T).get_support(indices=True) pop_array_red = [pop_array[i] for i in graphs_ix_keep] # Omnibus embedding -- random dot product graph (rdpg) print("%s%s%s%s%s" % ('Embedding ensemble for atlas: ', atlas, ' and ', subgraph_name, '...')) omni = OmnibusEmbed(check_lcc=False) mds = ClassicalMDS() try: omni_fit = omni.fit_transform(pop_array_red) except: omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform(omni_fit) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = dir_path + '/embeddings' if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = "%s%s%s%s%s%s%s%s" % (namer_dir, '/', list( flatten(ID))[0], '_omnetome_', atlas, '_', subgraph_name, '.npy') print('Saving...') np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
def omni(adjs, n_components): if PTR: adjs = [pass_to_ranks(a) for a in adjs] omni = OmnibusEmbed(n_components=n_components // len(adjs)) latent = omni.fit_transform(adjs) latent = np.concatenate(latent, axis=-1) # first is for in/out latent = np.concatenate(latent, axis=-1) # second is for concat. each graph return latent
def reg_omni(adjs): adjs = [a + 1 / (len(lp_inds)**2) for a in adjs] adjs = [augment_diagonal(a) for a in adjs] omni = OmnibusEmbed(n_components=4, check_lcc=False, n_iter=10) embed = omni.fit_transform(adjs) embed = np.concatenate(embed, axis=-1) embed = embed[2:] # TODO embed = np.concatenate(embed, axis=0) return embed
def omni(adjs, n_components=4, remove_first=None, concatenate=True): adjs = preprocess_adjs(adjs) omni = OmnibusEmbed(n_components=n_components, check_lcc=False, n_iter=10) embed = omni.fit_transform(adjs) embed = np.concatenate(embed, axis=-1) # this is for left/right latent positions if remove_first is not None: embed = embed[remove_first:] if concatenate: embed = np.concatenate(embed, axis=0) return embed
def omni_procrust_svd(embed_adjs): omni = OmnibusEmbed(n_components=None, check_lcc=False) joint_embed = omni.fit_transform(embed_adjs) cat_embed = np.concatenate(joint_embed, axis=-1) # print(f"Omni concatenated embedding shape: {cat_embed.shape}") for e in cat_embed: e[left_inds] = e[left_inds] @ orthogonal_procrustes( e[lp_inds], e[rp_inds])[0] cat_embed = np.concatenate(cat_embed, axis=-1) U, S, Vt = selectSVD(cat_embed, n_elbows=3) return U
def omni( adjs, n_components=4, remove_first=None, concat_graphs=True, concat_directed=True, method="ase", ): """Omni with a few extra (optional) bells and whistles for concatenation post embed Parameters ---------- adjs : [type] [description] n_components : int, optional [description], by default 4 remove_first : [type], optional [description], by default None concat_graphs : bool, optional [description], by default True concat_directed : bool, optional [description], by default True method : str, optional [description], by default "ase" Returns ------- [type] [description] """ adjs = preprocess_adjs(adjs, method=method) omni = OmnibusEmbed(n_components=n_components, check_lcc=False, n_iter=10) embed = omni.fit_transform(adjs) if concat_directed: embed = np.concatenate( embed, axis=-1 ) # this is for left/right latent positions if remove_first is not None: embed = embed[remove_first:] if concat_graphs: embed = np.concatenate(embed, axis=0) return embed
def __init__(self, learning_method, memory=None, verbose=False, plot_method=None, kfold = KFold(n_splits=4, shuffle=True) ): super(OmnibusPipeline, self).__init__(steps=learning_method, memory=None, verbose=verbose, plot_method=plot_method, kfold=kfold) if self.steps[0][0] != 'Omni': self.steps =[('Omni', OmnibusEmbed()), ('Flat', FunctionTransformer(lambda x: x.reshape((x.shape[0], -1)), validate=False))] + self.steps if plot_method is not None: self.plot = plot_method if kfold is not None: self.kfold = kfold
def lateral_omni(adj, lp_inds, rp_inds): left_left_adj = pass_to_ranks(adj[np.ix_(lp_inds, lp_inds)]) right_right_adj = pass_to_ranks(adj[np.ix_(rp_inds, rp_inds)]) omni = OmnibusEmbed(n_components=3, n_elbows=2, check_lcc=False, n_iter=10) ipsi_embed = omni.fit_transform([left_left_adj, right_right_adj]) ipsi_embed = np.concatenate(ipsi_embed, axis=-1) ipsi_embed = np.concatenate(ipsi_embed, axis=0) left_right_adj = pass_to_ranks(adj[np.ix_(lp_inds, rp_inds)]) right_left_adj = pass_to_ranks(adj[np.ix_(rp_inds, lp_inds)]) omni = OmnibusEmbed(n_components=3, n_elbows=2, check_lcc=False, n_iter=10) contra_embed = omni.fit_transform([left_right_adj, right_left_adj]) contra_embed = np.concatenate(contra_embed, axis=-1) contra_embed = np.concatenate(contra_embed, axis=0) embed = np.concatenate((ipsi_embed, contra_embed), axis=1) return embed
left_subgraph = g.subgraph(right_nodes) #%% # %config InlineBackend.figure_format = 'png' right_graph_list = [get_subgraph(g, "Hemisphere", "right") for g in graph_list] graphs = right_graph_list n_graphs = 4 n_verts = len(graphs[0].nodes) n_components = 2 embed_graphs = [pass_to_ranks(g) for g in graphs] omni = OmnibusEmbed(n_components=n_components) latent = omni.fit_transform(embed_graphs) latent = np.concatenate(latent, axis=-1) plot_latent = latent.reshape((n_graphs * n_verts, 2 * n_components)) labels = ( n_verts * ["A -> A"] + n_verts * ["A -> D"] + n_verts * ["D -> D"] + n_verts * ["D -> A"] ) # latent = np.concatenate(list(latent)) pairplot(plot_latent, labels=labels) #%% concatenate and look at that concatenate_latent = np.concatenate(list(latent), axis=-1) concatenate_latent.shape
sum_adj = np.sum(np.array(mb_color_graphs), axis=0) n_components = 4 # ptr_adj = pass_to_ranks(sum_adj) ase = AdjacencySpectralEmbed(n_components=n_components) sum_latent = ase.fit_transform(ptr_adj) sum_latent = np.concatenate(sum_latent, axis=-1) pairplot(sum_latent, labels=mb_class_labels) ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs] # graph_sum = [np.sum(a) for a in mb_color_graphs] # ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)] omni = OmnibusEmbed(n_components=n_components // 4) color_latent = omni.fit_transform(ptr_color_adjs) color_latent = np.concatenate(color_latent, axis=-1) color_latent = np.concatenate(color_latent, axis=-1) pairplot(color_latent, labels=mb_class_labels) from graspy.embed import MultipleASE mase = MultipleASE(n_components=n_components) mase_latent = mase.fit_transform(ptr_color_adjs) mase_latent = np.concatenate(mase_latent, axis=-1) pairplot(mase_latent, labels=mb_class_labels) #%% graph = load_networkx("G")
graph_types = ["Gad", "Gaa", "Gdd", "Gda"] adjs = [] for g in graph_types: temp_mg = load_metagraph(g, version="2020-04-01") temp_mg.reindex(mg.meta.index, use_ids=True) temp_adj = temp_mg.adj adjs.append(temp_adj) # embed_adjs = [pass_to_ranks(a) for a in adjs] # %% [markdown] # ## just omni on the 4 colors for the right right subgraph right_embed_adjs = [pass_to_ranks(a[np.ix_(rp_inds, rp_inds)]) for a in adjs] omni = OmnibusEmbed(check_lcc=False) embeds = omni.fit_transform(right_embed_adjs) embeds = np.concatenate(embeds, axis=-1) embeds = np.concatenate(embeds, axis=-1) print(embeds.shape) U, S, V = selectSVD(embeds, n_components=8) labels = meta["merge_class"].values[rp_inds] plot_pairs(U, labels) stashfig(f"simple-omni-right-reduced-4-color") # %% [markdown] # ## Look at what each edge color type looks like when regularized by g # only the right right subgraph
idx = mg.meta[mg.meta["hemisphere"].isin(["L", "R"])].index mg = mg.reindex(idx, use_ids=True) mg = mg.make_lcc() print(len(mg)) mg.calculate_degrees(inplace=True) meta = mg.meta meta["inds"] = range(len(meta)) adj = mg.adj.copy() lp_inds, rp_inds = get_paired_inds(meta) left_inds = meta[meta["left"]]["inds"] # adj = pass_to_ranks(adj) left_left_adj = pass_to_ranks(adj[np.ix_(lp_inds, lp_inds)]) right_right_adj = pass_to_ranks(adj[np.ix_(rp_inds, rp_inds)]) omni = OmnibusEmbed(n_components=3, n_elbows=2, check_lcc=False, n_iter=10) embed = omni.fit_transform([left_left_adj, right_right_adj]) embed = np.concatenate(embed, axis=-1) embed = np.concatenate(embed, axis=0) labels = np.concatenate((meta["merge_class"].values[lp_inds], meta["merge_class"].values[rp_inds])) plot_pairs( embed, labels, left_pair_inds=np.arange(len(lp_inds)), right_pair_inds=np.arange(len(lp_inds)) + len(lp_inds), ) stashfig(f"omni-pairs-high-threshold-quantile={quantile}") print()
def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Omnibus embedding of arbitrary number of input graphs with matched vertex sets. Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency matrices of a collection :math:`m` undirected graphs with matched vertices. Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded using adjacency spectral embedding. Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing omni embedding tensor. References ---------- .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017, November). A central limit theorem for an omnibus embedding of multiple random dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International Conference on (pp. 964-967). IEEE. .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in python. Journal of Machine Learning Research. """ import numpy as np from pynets.core.utils import flatten from graspy.embed import OmnibusEmbed, ClassicalMDS from joblib import dump # Omnibus embedding print( f"{'Embedding unimodal omnetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) omni = OmnibusEmbed(check_lcc=False) mds = ClassicalMDS() omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform(omni_fit) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = ( f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_omnetome.npy" ) out_path_est_omni = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_omni.joblib" out_path_est_mds = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_mds.joblib" dump(omni, out_path_est_omni) dump(omni, out_path_est_mds) print("Saving...") np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
#%% graph_types = ["Gaan", "Gadn", "Gdan", "Gddn"] adjs = [] for g in graph_types: g = load_networkx("Gn") matched_graph = g.subgraph(nodelist) adj_df = nx.to_pandas_adjacency(matched_graph, nodelist=nodelist) adj = adj_df.values adjs.append(adj) # class_labels = meta_df.loc[nodelist.astype(int), "Class"] from graspy.embed import OmnibusEmbed from graspy.utils import pass_to_ranks omni = OmnibusEmbed(n_components=2) adjs = [pass_to_ranks(a) for a in adjs] omni_latent = omni.fit_transform(adjs) omni_latent = np.concatenate(omni_latent, axis=-1) omni_latent.shape cat_omni_latent = np.concatenate(omni_latent, axis=-1) cat_omni_latent.shape #%% pairplot(cat_omni_latent, labels=side_labels) #%% mean_omni_latent = (cat_omni_latent[:n_per_side] + cat_omni_latent[n_per_side:]) / 2 pairplot(mean_omni_latent, labels=class_labels[:n_per_side])
n_per_side = len(left_nodes) class_labels = meta_df.loc[nodelist.astype(int), "Class"].values #%% Omni the left and right, using the sum matrix, RAW ptr = False adj = get_paired_adj("G", nodelist) if ptr: adj = pass_to_ranks(adj) left_left_adj = adj[:n_per_side, :n_per_side] right_right_adj = adj[n_per_side:, n_per_side:] adjs = [left_left_adj, right_right_adj] omni = OmnibusEmbed(n_components=None) latents = omni.fit_transform(adjs) latents = np.concatenate(latents, axis=-1) diff = latents[0] - latents[1] norm_diff_summed = np.linalg.norm(diff, axis=1) sns.distplot(norm_diff_summed) #%% Now do the same thing, but incorporate the 4-colors graph_types = ["Gad", "Gaa", "Gdd", "Gda"] left_color_adjs = [] right_color_adjs = [] for t in graph_types: adj = get_paired_adj(t, nodelist) if ptr:
def _omni_embed(pop_array, atlas, graph_path_list, ID, subgraph_name="all_nodes", n_components=None, norm=1): """ Omnibus embedding of arbitrary number of input graphs with matched vertex sets. Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency matrices of a collection :math:`m` undirected graphs with matched vertices. Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded using adjacency spectral embedding. Parameters ---------- pop_array : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str The name of an atlas (indicating the node definition). graph_pathlist : list List of file paths to graphs in pop_array. ID : str An arbitrary subject identifier. subgraph_name : str Returns ------- out_path : str File path to .npy file containing omni embedding tensor. References ---------- .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017, November). A central limit theorem for an omnibus embedding of multiple random dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International Conference on (pp. 964-967). IEEE. .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in python. Journal of Machine Learning Research. """ import networkx as nx import numpy as np from pynets.core.utils import flatten from graspy.embed import OmnibusEmbed, ClassicalMDS from joblib import dump from pynets.stats.netstats import CleanGraphs clean_mats = [] i = 0 for graph_path in graph_path_list: cg = CleanGraphs(None, None, graph_path, 0, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = pop_array[i] clean_mats.append(mat_clean) i += 1 # Omnibus embedding print(f"{'Embedding unimodal omnetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}") omni = OmnibusEmbed(n_components=n_components, check_lcc=False) mds = ClassicalMDS(n_components=n_components) omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform( omni_fit.reshape(omni_fit.shape[1], omni_fit.shape[2], omni_fit.shape[0])) dir_path = str(Path(os.path.dirname(graph_path_list[0])).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = ( f"{namer_dir}/gradient-OMNI_{atlas}_{subgraph_name}_" f"{os.path.basename(graph_path_list[0]).split('_thrtype')[0]}.npy") # out_path_est_omni = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split('_thrtype')[0]}" \ # f"_MDS.joblib" # out_path_est_mds = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split('_thrtype')[0]}" \ # f"_MDS.joblib" # dump(omni, out_path_est_omni) # dump(omni, out_path_est_mds) print("Saving...") np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
import matplotlib.pyplot as plt # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. plt.subplots(figsize=(10, 10)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8) # Plot also the training points plt.scatter(Xhat[:, 0], Xhat[:, 1], c=labels_sbm, cmap=plt.cm.coolwarm) plt.xlabel('Sepal length') plt.ylabel('Sepal width') plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.show() OP = OmnibusPipeline(learning_method=[('svc', SVC(gamma='scale', kernel='linear'))])#.fit(Gs, labels) from graspy.embed import OmnibusEmbed from sklearn.preprocessing import FunctionTransformer OPT = OmnibusPipeline([('Omni', OmnibusEmbed()), ('Flat', FunctionTransformer(lambda x: x.reshape(x.shape[0], -1), validate=False))]) L = OPT.fit_transform(Gs) sc = SVC(gamma='scale', kernel='linear') sc.fit(L, labels) print(sc.predict(L[30:34])) print('\n') print(L.shape) #OP.fit(Gs, labels) #print(OP.predict([1])) print(OP.cross_val_score(Gs, labels))
adj, class_labels, side_labels = load_everything("G", version=BRAIN_VERSION, return_class=True, return_side=True) color_adjs = [] for t in GRAPH_TYPES: adj = load_everything(t) color_adjs.append(adj) sum_adj = np.sum(color_adjs, axis=0) embed_adjs = [color_adjs[0], sum_adj] embed_adjs = [pass_to_ranks(g) for g in embed_adjs] embed = OmnibusEmbed(n_components=4) latents = embed.fit_transform(embed_adjs) latents = np.concatenate(latents, axis=-1) n_verts = sum_adj.shape[0] indicator = n_verts * ["AtD"] + n_verts * ["Sum"] plot_latents = np.concatenate(latents, axis=0) pairplot(plot_latents, labels=indicator) plot_class_labels = np.concatenate((class_labels, class_labels)) #%% pairplot(plot_latents, plot_class_labels, palette="tab20") #%% diffs = np.linalg.norm(latents[0] - latents[1], axis=1) plt.figure(figsize=(20, 10)) sns.set_palette("tab20") sns.set_context("talk", font_scale=1.25)
# %% Load and preprocess all graphs graph_types = ["Gad", "Gaa", "Gdd", "Gda"] adjs = [] for g in graph_types: temp_mg = load_metagraph(g, version="2020-04-01") temp_mg.reindex(mg.meta.index, use_ids=True) temp_adj = temp_mg.adj adjs.append(temp_adj) embed_adjs = [pass_to_ranks(a) for a in adjs] embed_adjs = [a + 1 / a.size for a in embed_adjs] embed_adjs = [augment_diagonal(a) for a in embed_adjs] #%% omni = OmnibusEmbed(n_components=None, check_lcc=False) joint_embed = omni.fit_transform(embed_adjs) print(joint_embed[0].shape) # %% [markdown] # ## meta = mg.meta lp_inds, rp_inds = get_paired_inds(meta) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] cat_embed = np.concatenate(joint_embed, axis=-1) for e in cat_embed: e[left_inds] = e[left_inds] @ orthogonal_procrustes( e[lp_inds], e[rp_inds])[0] cat_embed = np.concatenate(cat_embed, axis=-1)