def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="all_nodes", n_components=None, prune=0, norm=1): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- mat : ndarray or nx.Graph An nxn adjacency matrix or graph object. atlas : str The name of an atlas (indicating the node definition). graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import os import networkx as nx import numpy as np from pynets.core.utils import flatten from graspologic.embed.ase import AdjacencySpectralEmbed from joblib import dump from pynets.stats.netstats import CleanGraphs # Adjacency Spectral embedding print( f"{'Embedding unimodal asetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}" ) ase = AdjacencySpectralEmbed(n_components=n_components) cg = CleanGraphs(None, None, graph_path, prune, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = mat if float(prune) >= 1: graph_path_tmp = cg.prune_graph()[1] mat_clean = np.load(graph_path_tmp) mat_clean[np.where(np.isnan(mat_clean) | np.isinf(mat_clean))] = 0 if (np.abs(mat_clean) < 0.0000001).all() or np.isnan(np.sum(mat_clean)): return None ase_fit = ase.fit_transform(mat_clean) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if os.path.isdir(namer_dir) is False: os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/gradient-ASE" \ f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}" # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \ # f"_{subgraph_name}" \ # f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib" #dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
def _omni_embed(pop_array, atlas, graph_path_list, ID, subgraph_name="all_nodes", n_components=None, norm=1): """ Omnibus embedding of arbitrary number of input graphs with matched vertex sets. Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency matrices of a collection :math:`m` undirected graphs with matched vertices. Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded using adjacency spectral embedding. Parameters ---------- pop_array : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str The name of an atlas (indicating the node definition). graph_pathlist : list List of file paths to graphs in pop_array. ID : str An arbitrary subject identifier. subgraph_name : str Returns ------- out_path : str File path to .npy file containing omni embedding tensor. References ---------- .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017, November). A central limit theorem for an omnibus embedding of multiple random dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International Conference on (pp. 964-967). IEEE. .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in python. Journal of Machine Learning Research. """ import os import networkx as nx import numpy as np from pynets.core.utils import flatten from graspologic.embed.omni import OmnibusEmbed from graspologic.embed.mds import ClassicalMDS from joblib import dump from pynets.stats.netstats import CleanGraphs dir_path = str(Path(os.path.dirname(graph_path_list[0])).parent) namer_dir = f"{dir_path}/embeddings" if os.path.isdir(namer_dir) is False: os.makedirs(namer_dir, exist_ok=True) clean_mats = [] i = 0 for graph_path in graph_path_list: cg = CleanGraphs(None, None, graph_path, 0, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = pop_array[i] mat_clean[np.where(np.isnan(mat_clean) | np.isinf(mat_clean))] = 0 if np.isnan(np.sum(mat_clean)) == False: clean_mats.append(mat_clean) i += 1 clean_mats = [i for i in clean_mats if np.isfinite(i).all()] if len(clean_mats) > 0: # Omnibus embedding print( f"{'Embedding unimodal omnetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}" ) omni = OmnibusEmbed(n_components=n_components, check_lcc=False) mds = ClassicalMDS(n_components=n_components) omni_fit = omni.fit_transform(clean_mats) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform(omni_fit.reshape(omni_fit.shape[1], omni_fit.shape[2], omni_fit.shape[0])) out_path = ( f"{namer_dir}/gradient-OMNI_{atlas}_{subgraph_name}_" f"{os.path.basename(graph_path_list[0]).split('_thrtype')[0]}.npy" ) # out_path_est_omni = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split( # '_thrtype')[0]}" \ # f"_MDS.joblib" # out_path_est_mds = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split( # '_thrtype')[0]}" \ # f"_MDS.joblib" # dump(omni, out_path_est_omni) # dump(omni, out_path_est_mds) print("Saving...") np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit else: # Add a null tmp file to prevent pool from breaking out_path = f"{namer_dir}/gradient-OMNI" \ f"_{atlas}_{subgraph_name}_" \ f"{os.path.basename(graph_path_list[0])}_NULL" if not os.path.exists(out_path): os.mknod(out_path) return out_path