def make_lcc(self): lcc, inds = get_lcc(self.adj, return_inds=True) self.adj = lcc self.meta = self.meta.iloc[inds, :] self.g = _numpy_pandas_to_nx(self.adj, self.meta) self.n_verts = self.adj.shape[0] return self
def preprocess_graph(adj, class_labels, skeleton_labels): # sort by number of synapses degrees = adj.sum(axis=0) + adj.sum(axis=1) sort_inds = np.argsort(degrees)[::-1] adj = adj[np.ix_(sort_inds, sort_inds)] class_labels = class_labels[sort_inds] skeleton_labels = skeleton_labels[sort_inds] # remove disconnected nodes adj, lcc_inds = get_lcc(adj, return_inds=True) class_labels = class_labels[lcc_inds] skeleton_labels = skeleton_labels[lcc_inds] # remove pendants degrees = np.count_nonzero(adj, axis=0) + np.count_nonzero(adj, axis=1) not_pendant_mask = degrees != 1 not_pendant_inds = np.array(range(len(degrees)))[not_pendant_mask] adj = adj[np.ix_(not_pendant_inds, not_pendant_inds)] class_labels = class_labels[not_pendant_inds] skeleton_labels = skeleton_labels[not_pendant_inds] return adj, class_labels, skeleton_labels
def preprocess_graph(adj, *args): degrees = adj.sum(axis=0) + adj.sum(axis=1) # remove disconnected nodes adj, lcc_inds = get_lcc(adj, return_inds=True) new_args = [] for a in args: new_a = a[lcc_inds] new_args.append(new_a) # remove pendants degrees = np.count_nonzero(adj, axis=0) + np.count_nonzero(adj, axis=1) not_pendant_mask = degrees != 1 not_pendant_inds = np.array(range(len(degrees)))[not_pendant_mask] adj = adj[np.ix_(not_pendant_inds, not_pendant_inds)] new_args = [] for a in args: new_a = a[not_pendant_inds] new_args.append(new_a) returns = tuple([adj] + new_args) return returns
edgelist_df = preprocess(mg, remove_pdiff=True) rows = [] neigh_probs = [] thresholds = np.linspace(0, 0.1, 20) for threshold in thresholds: thresh_df = edgelist_df[edgelist_df["max_syn_weight"] > 1] # thresh_df = edgelist_df.copy() thresh_df = thresh_df[thresh_df["max_norm_weight"] > threshold] nodelist = list(mg.g.nodes()) nodelist = [int(i) for i in nodelist] thresh_g = nx.from_pandas_edgelist( thresh_df, edge_attr=True, create_using=nx.DiGraph ) nx.set_node_attributes(thresh_g, mg.meta.to_dict(orient="index")) thresh_g = get_lcc(thresh_g) n_verts = len(thresh_g) n_missing = 0 for n, data in thresh_g.nodes(data=True): pair = data["Pair"] pair_id = data["Pair ID"] if pair != -1: if pair not in thresh_g: thresh_g.node[n]["Pair"] = -1 thresh_g.node[n]["Pair ID"] = -1 n_missing += 1 mg = MetaGraph(thresh_g, weight="max_norm_weight") meta = mg.meta
right_inds = np.where(side_labels == "R")[0] adj = adj[np.ix_(right_inds, right_inds)] class_labels = class_labels[right_inds] skeleton_labels = skeleton_labels[right_inds] else: side = "full brain" # sort by number of synapses degrees = adj.sum(axis=0) + adj.sum(axis=1) sort_inds = np.argsort(degrees)[::-1] adj = adj[np.ix_(sort_inds, sort_inds)] class_labels = class_labels[sort_inds] skeleton_labels = skeleton_labels[sort_inds] # remove disconnected nodes adj, lcc_inds = get_lcc(adj, return_inds=True) class_labels = class_labels[lcc_inds] skeleton_labels = skeleton_labels[lcc_inds] # remove pendants degrees = np.count_nonzero(adj, axis=0) + np.count_nonzero(adj, axis=1) not_pendant_mask = degrees != 1 not_pendant_inds = np.array(range(len(degrees)))[not_pendant_mask] adj = adj[np.ix_(not_pendant_inds, not_pendant_inds)] class_labels = class_labels[not_pendant_inds] skeleton_labels = skeleton_labels[not_pendant_inds] def to_laplace(graph, form="DAD", regularizer=None): r""" A function to convert graph adjacency matrix to graph laplacian.
def stashfig(name, **kws): if SAVEFIGS: savefig(name, foldername=FNAME, fmt=DEFAULT_FMT, dpi=DEFUALT_DPI, **kws) GRAPH_VERSION = "2019-09-18-v2" adj, class_labels, side_labels = load_everything("Gad", GRAPH_VERSION, return_class=True, return_side=True) adj, inds = get_lcc(adj, return_inds=True) class_labels = class_labels[inds] side_labels = side_labels[inds] n_verts = adj.shape[0] # %% [markdown] # # graph_types = ["Gad", "Gaa", "Gdd", "Gda"] graph_type_labels = [r"A $\to$ D", r"A $\to$ A", r"D $\to$ D", r"D $\to$ A"] GRAPH_VERSION = "2019-09-18-v2" sns.set_context("talk", font_scale=1) def gridmap(A, ax=None, legend=False, sizes=(10, 70)): if ax is None:
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from graspy.utils import get_lcc # Adjacency Spectral embedding print( f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) ase = AdjacencySpectralEmbed() ase_fit = ase.fit_transform(get_lcc(mat)) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy" out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib" dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
n_edges_t = np.count_nonzero(full_graph_t) print(f"Number of edges remaining: {n_edges_t}") print(f"Removed {(n_edges - n_edges_t) / n_edges} of edges") gridplot( [full_graph_t], inner_hier_labels=simple_classes, outer_hier_labels=hemisphere, title="Weight thresholded, Gadn, PTR-simple-all", **gridplot_kws, ) #%% print("Finding largest connected component") lcc_graph_t, lcc_inds = get_lcc(full_graph_t, return_inds=True) lcc_simple_classes = simple_classes[lcc_inds] lcc_hemisphere = hemisphere[lcc_inds] n_nodes_t = lcc_graph_t.shape[0] print(f"Number of remaining nodes: {n_nodes_t}") print(f"Removed {(n_nodes - n_nodes_t) / n_nodes} of nodes") #%% print("Embedding binarized graph") from graspy.plot import screeplot screeplot(embed_graph, cumulative=False, show_first=20, n_elbows=3) #%% n_components = None
split_graph_dict = {} split_graph_list = [] for i, graph_type in enumerate(split_graph_types): # load graph = load_june(graph_type) graph = get_subgraph(graph, "Hemisphere", side) # save for later split_graph_dict[graph_type] = graph split_graph_list.append(graph) #%% embedding parameters n_components = 3 #%% ASE embed_graph = get_lcc(full_graph) labels = get_simple(embed_graph) embed_graph = preprocess(embed_graph) ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(embed_graph) latent = np.concatenate(latent, axis=-1) pairplot(latent, labels=labels, title="ASE" + base_title) save("ASE") #%% LSE regularizer = 1 embed_graph = get_lcc(full_graph) labels = get_simple(embed_graph) embed_graph = preprocess(embed_graph) lse = LaplacianSpectralEmbed(form="R-DAD", n_components=n_components,
outer_hier_labels=hemisphere, hier_label_fontsize=10, ) gridplot( [full_graph_t], transform="simple-all", height=15, inner_hier_labels=simple_classes, outer_hier_labels=hemisphere, hier_label_fontsize=10, sizes=(1, 10), ) #%% print("Finding largest connected component") lcc_graph_t, lcc_inds = get_lcc(full_graph_t, return_inds=True) lcc_simple_classes = simple_classes[lcc_inds] lcc_hemisphere = hemisphere[lcc_inds] n_nodes_t = lcc_graph_t.shape[0] print(f"Number of remaining nodes: {n_nodes_t}") print(f"Removed {(n_nodes - n_nodes_t) / n_nodes} of nodes") #%% print("Embedding graph") n_components = 4 regularizer = 2 ptr = True binary = False embed_graph = lcc_graph_t if ptr: embed_graph = pass_to_ranks(embed_graph)
#%% graph_types = ["Gaan", "Gadn", "Gdan", "Gddn"] n_components = 4 # load the right side use_graph = "Gn" hemisphere = "right" print(f"Using graph {use_graph}") Gn = load_graph(use_graph) Gn = get_subgraph(Gn, "Hemisphere", hemisphere) n_verts_original = len(Gn) print(f"Selected {hemisphere} side") print("Checking if graph is fully connected") print(is_fully_connected(Gn)) Gn, inds = get_lcc(Gn, return_inds=True) num_removed = n_verts_original - len(Gn) print(f"Removed {num_removed} node") # select metadata classes = meta_to_array(Gn, "Class") simple_classes = to_simple_class(classes) names = meta_to_array(Gn, "Name") ids = meta_to_array(Gn, "ID") # load adjacency and preprocess Gn_adj = import_graph(Gn) Gn_adj = remove_loops(Gn_adj) # Gn = pass_to_ranks(Gn) Gn_adj = binarize(Gn_adj)