def run_fit(seed, param_grid, directed, n_init, n_jobs, co_block): # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_left_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_left_df, file_obs, "cosbm_left_df") # run right graph = load_drosophila_right() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_right_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_right_df, file_obs, "cosbm_right_df") return 0
def run_fit(seed, param_grid, directed, n_init, n_jobs): # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) ddcsbm_left_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df") # run right graph = load_drosophila_right() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) ddcsbm_right_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df") return 0
def run_fit(seed, directed, n_components_range): # run left left_graph, labels = load_left() if not directed: left_graph = symmetrize(left_graph, method="avg") # run right right_graph, labels = load_right() if not directed: right_graph = symmetrize(right_graph, method="avg") outs = [] for n_components in n_components_range: ldt = LatentDistributionTest(n_components=n_components, n_bootstraps=500) ldt.fit(left_graph, right_graph) result = {} result["p-value"] = ldt.p_ result["sample-t"] = ldt.sample_T_statistic_ result["n_components"] = n_components outs.append(result) print(f"Done with {n_components}") out_df = pd.DataFrame(outs) save_obj(out_df, file_obs, "ldt_df") return 0
def to_minigraph( adj, labels, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): # convert the adjacency and a partition to a minigraph based on SBM probs prob_df = get_blockmodel_df( adj, labels, return_counts=use_counts, use_weights=use_weights ) if drop_neg and ("-1" in prob_df.index): prob_df.drop("-1", axis=0, inplace=True) prob_df.drop("-1", axis=1, inplace=True) if remove_diag: adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(labels, return_counts=True) # add size attribute base on number of vertices size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") # add signal flow attribute (for the minigraph itself) mini_adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(mini_adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") # add spectral properties sym_adj = symmetrize(mini_adj) n_components = 10 latent = AdjacencySpectralEmbed(n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(uni_labels, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors if color_map is None: color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color") return g
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_init, embed_kws_try_range, n_jobs, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) np.random.seed(seed) param_grid = { "n_components": n_components_try_range, "n_blocks": n_block_try_range, "embed_kws": embed_kws_try_range, } out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) print(out_df.head()) save_obj(out_df, file_obs, "grid_search_out") return 0
def run_fit(seed, param_grid, directed, n_init, n_jobs): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) np.random.seed(seed) dcsbm_out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) ddcsbm_out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=True, n_jobs=n_jobs, n_init=n_init, ) save_obj(dcsbm_out_df, file_obs, "dcsbm_out_df") save_obj(ddcsbm_out_df, file_obs, "ddcsbm_out_df") return 0
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm(graph, n_components_try_range, n_block_try_range, directed=directed) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed) def metric(assignments, *args): return -compute_mse_from_assignments( assignments, graph, directed=directed) tsbm_master_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, method="bc-metric", metric=metric, ) return (sbm_master_df, rdpg_df, tsbm_master_df)
def levenshtein(str_paths): dist_mat = np.zeros((len(str_paths), len(str_paths))) lev = textdistance.Levenshtein(qval=None) for i, sp1 in enumerate(str_paths): for j, sp2 in enumerate(str_paths[i + 1:]): dist = lev.distance(sp1, sp2) / max(sp1.count(" "), sp2.count(" ")) dist_mat[i, j] = dist dist_mat = symmetrize(dist_mat, method="triu") return dist_mat
def run_fit(seed, directed): # run left graph, labels = load_left() print(labels) if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_left_df = fit_a_priori(sbm, graph, labels) print(sbm_left_df["n_params"]) save_obj(sbm_left_df, file_obs, "sbm_left_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_left_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_left_df, file_obs, "dcsbm_left_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_left_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df") # run right graph, labels = load_right() if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_right_df = fit_a_priori(sbm, graph, labels) save_obj(sbm_right_df, file_obs, "sbm_right_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_right_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_right_df, file_obs, "dcsbm_right_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_right_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df") return 0
def run_fit(seed, param_grid, directed, n_jobs): np.random.seed(seed) # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) rdpg_left_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs) save_obj(rdpg_left_df, file_obs, "rdpg_left_df") # run right graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) rdpg_right_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs) save_obj(rdpg_right_df, file_obs, "rdpg_right_df") return 0
def run_fit(seed, directed, n_components_range): # run left left_graph, labels = load_left() if not directed: left_graph = symmetrize(left_graph, method="avg") # run right right_graph, labels = load_right() if not directed: right_graph = symmetrize(right_graph, method="avg") def fit(n_components): # np.random.seed(seed) return fit_ldt(left_graph, right_graph, n_components) outs = Parallel(n_jobs=-2, verbose=5)(delayed(fit)(n) for n in n_components_range) out_df = pd.DataFrame(outs) save_obj(out_df, file_obs, "ldt_df") return 0
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, rank="sweep", ) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) save_obj(sbm_master_df, file_obs, "sbm_master_df") return 0
def add_attributes( g, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): nodelist = list(g.nodes()) # add spectral properties sym_adj = symmetrize(nx.to_numpy_array(g, nodelist=nodelist)) n_components = 10 latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(nodelist, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors # nx.set_node_attributes(g, color_map, name="Color") for node, data in g.nodes(data=True): c = data["cell_class"] color = CLASS_COLOR_DICT[c] data["color"] = color # add size attribute base on number of edges size_map = dict(path_graph.degree(weight="weight")) nx.set_node_attributes(g, size_map, name="Size") return g
def run_fit(seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) sbm_df = select_sbm(graph, n_components_try_range, n_block_try_range, directed=directed) rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed) return (sbm_df, rdpg_df)
def _load_dataset(path, n_nodes, ptr=None): file = np.load(path) X = file["X"] y = file["y"].astype(int) n_samples = X.shape[0] y[y == -1] = 0 idx = np.triu_indices(n_nodes, k=1) X_graphs = np.zeros((n_samples, n_nodes, n_nodes)) for i, x in enumerate(X): X_graphs[i][idx] = x X_graphs[i] = symmetrize(X_graphs[i], "triu") if ptr is not None: X_graphs = X_graphs - X_graphs.min(axis=(1, 2)).reshape(-1, 1, 1) for i, x in enumerate(X_graphs): X_graphs[i] = pass_to_ranks(X_graphs[i]) return X_graphs, y
figsize=(20, 15), sharey=True, gridspec_kw=dict(width_ratios=[0.25, 0.75], wspace=0), ) mid_map = draw_leaf_dendrogram(mg.meta, axs[0], lowest_level=lowest_level, draw_labels=False) key_order = list(mid_map.keys()) compartment = "dendrite" direction = "postsynaptic" foldername = "160.1-BDP-morpho-dcorr" filename = f"test-statslvl={level}-compartment={compartment}-direction={direction }-method=subsample-n_sub=96-max_samp=500" stat_df = readcsv(filename, foldername=foldername, index_col=0) sym_vals = symmetrize(stat_df.values, method="triu") stat_df = pd.DataFrame(data=sym_vals, index=stat_df.index, columns=stat_df.index) ordered_stat_df = stat_df.loc[key_order, key_order] sns.set_context("talk") sns.heatmap(ordered_stat_df, ax=axs[1], cbar=False, cmap="RdBu_r", center=0) axs[1].invert_yaxis() axs[1].invert_xaxis() axs[1].set_xticklabels([]) remove_shared_ax(axs[0]) remove_shared_ax(axs[1]) axs[1].set_yticks(np.arange(len(key_order)) + 0.5) axs[1].set_yticklabels(key_order) axs[1].yaxis.tick_right()
def run_experiment(graph_type=None, threshold=None, res=None, binarize=None, seed=None, param_key=None): # common names if BLIND: basename = f"{param_key}-" title = param_key else: basename = f"louvain-res{res}-t{threshold}-{graph_type}-" title = f"Louvain, {graph_type}, res = {res}, threshold = {threshold}" np.random.seed(seed) # load and preprocess the data mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess( mg, threshold=threshold, sym_threshold=True, remove_pdiff=True, binarize=binarize, ) adj = mg.adj adj = symmetrize(adj, method="avg") mg = MetaGraph(adj, mg.meta) g_sym = mg.g skeleton_labels = np.array(list(g_sym.nodes())) partition, modularity = run_louvain(g_sym, res, skeleton_labels) partition_series = pd.Series(partition, index=skeleton_labels) partition_series.name = param_key if SAVEFIGS: # get out some metadata class_label_dict = nx.get_node_attributes(g_sym, "Merge Class") class_labels = np.array(itemgetter(*skeleton_labels)(class_label_dict)) lineage_label_dict = nx.get_node_attributes(g_sym, "lineage") lineage_labels = np.array( itemgetter(*skeleton_labels)(lineage_label_dict)) lineage_labels = np.vectorize(lambda x: "~" + x)(lineage_labels) classlin_labels, color_dict, hatch_dict = augment_classes( class_labels, lineage_labels) # TODO then sort all of them by proportion of sensory/motor # barplot by merge class and lineage _, _, order = barplot_text( partition, classlin_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=True, figsize=(24, 18), title=title, hatch_dict=hatch_dict, return_order=True, ) stashfig(basename + "barplot-mergeclasslin-props") category_order = np.unique(partition)[order] fig, axs = barplot_text( partition, class_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=True, figsize=(24, 18), title=title, hatch_dict=None, category_order=category_order, ) stashfig(basename + "barplot-mergeclass-props") fig, axs = barplot_text( partition, class_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=False, figsize=(24, 18), title=title, hatch_dict=None, category_order=category_order, ) stashfig(basename + "barplot-mergeclass-counts") # TODO add gridmap counts = False weights = False prob_df = get_blockmodel_df(mg.adj, partition, return_counts=counts, use_weights=weights) prob_df = prob_df.reindex(category_order, axis=0) prob_df = prob_df.reindex(category_order, axis=1) probplot(100 * prob_df, fmt="2.0f", figsize=(20, 20), title=title, font_scale=0.7) stashfig(basename + f"probplot-counts{counts}-weights{weights}") return partition_series, modularity
# %% [markdown] # # from src.data import load_networkx, load_everything import networkx as nx from graspy.utils import binarize, symmetrize graph = load_networkx("G") nx.algorithms.diameter(graph) # %% [markdown] # # adj = load_everything("G") adj = symmetrize(adj, "avg") graph = nx.from_numpy_array(adj) nx.algorithms.diameter(graph) # %% [markdown] # # adj = load_everything("Gad") adj = symmetrize(adj, "avg") graph = nx.from_numpy_array(adj) nx.algorithms.diameter(graph)
labels=meta["merge_class"].values, left_pair_inds=lp_inds, right_pair_inds=rp_inds, ) # %% [markdown] # ## from graspy.utils import symmetrize # manifold = TSNE(metric="cosine") # tsne_embed = tsne.fit_transform(U) manifold = ClassicalMDS(n_components=U.shape[1] - 1, dissimilarity="precomputed") # manifold = MDS(n_components=2, dissimilarity="precomputed") # manifold = Isomap(n_components=2, metric="precomputed") pdist = symmetrize(pairwise_distances(U, metric="cosine")) manifold_embed = manifold.fit_transform(pdist) plot_pairs( manifold_embed, labels=meta["merge_class"].values, left_pair_inds=lp_inds, right_pair_inds=rp_inds, ) # %% [markdown] # ## fig, ax = plt.subplots(1, 1, figsize=(10, 10)) plot_df = pd.DataFrame(data=manifold_embed) plot_df["merge_class"] = meta["merge_class"].values
nw_scores = np.zeros((len(seqs), len(seqs))) aligner = GlobalSequenceAligner(DistScoring(pdist), 1000 - med * 1000) for i in tqdm(range(len(seqs))): for j in range(i, len(seqs)): score, encodeds = aligner.align(seqs[i], seqs[j], backtrace=True) s = score / (1000 * max(len(seqs[i]), len(seqs[j]))) nw_scores[i, j] = s # %% [markdown] # ## from graspy.utils import symmetrize sns.heatmap(nw_scores) nw_scores = symmetrize(nw_scores, "triu") nw_dists = 1 - nw_scores # %% [markdown] # ## fig, ax = plt.subplots(1, 1, figsize=(6, 6)) sns.heatmap(nw_dists) Z = linkage(squareform(nw_dists), method="average") sns.clustermap(nw_dists, row_linkage=Z, col_linkage=Z) # %% [markdown] # ## pal = sns.color_palette("husl", n_colors=max(map(len, seqs))) # %% [markdown]
def _process_metagraph(mg, temp_loc): adj = mg.adj adj = symmetrize(adj, method="avg") mg = MetaGraph(adj, mg.meta) nx.write_graphml(mg.g, temp_loc)
from graspy.inference import LatentDistributionTest from graspy.simulations import sbm from graspy.utils import symmetrize from pandas import DataFrame from joblib import Parallel, delayed warnings.filterwarnings("ignore") # get where we are just to save output figure folderpath = Path(__file__.replace(basename(__file__), "")) savepath = folderpath / "outputs" np.random.seed(8888) B = [[0.5, 0.2], [0.2, 0.05]] B = symmetrize(B) k = 2 tests = 1 start = 50 stop = 500 diff1 = 50 diff2 = 100 reps = 10 alpha = 0.05 ns = [] ms = [] newms = [] error_list = [] temp = [] for n in range(start, stop, diff1):
def motif_matching( paths, ID, atlas, namer_dir, name_list, metadata_list, multigraph_list_all, graph_path_list_all, rsn=None, ): import networkx as nx import numpy as np import glob import pickle from pynets.core import thresholding from pynets.stats.netmotifs import compare_motifs from sklearn.metrics.pairwise import cosine_similarity from pynets.stats.netstats import community_resolution_selection from graspy.utils import remove_loops, symmetrize, get_lcc from pynets.core.nodemaker import get_brainnetome_node_attributes [struct_graph_path, func_graph_path] = paths struct_mat = np.load(struct_graph_path) func_mat = np.load(func_graph_path) [struct_coords, struct_labels, struct_label_intensities] = \ get_brainnetome_node_attributes(glob.glob( f"{str(Path(struct_graph_path).parent.parent)}/nodes/*.json"), struct_mat.shape[0]) [func_coords, func_labels, func_label_intensities] = \ get_brainnetome_node_attributes(glob.glob( f"{str(Path(func_graph_path).parent.parent)}/nodes/*.json"), func_mat.shape[0]) # Find intersecting nodes across modalities (i.e. assuming the same # parcellation, but accomodating for the possibility of dropped nodes) diff1 = list(set(struct_label_intensities) - set(func_label_intensities)) diff2 = list(set(func_label_intensities) - set(struct_label_intensities)) G_struct = nx.from_numpy_array(struct_mat) G_func = nx.from_numpy_array(func_mat) bad_idxs = [] for val in diff1: bad_idxs.append(struct_label_intensities.index(val)) bad_idxs = sorted(list(set(bad_idxs)), reverse=True) if type(struct_coords) is np.ndarray: struct_coords = list(tuple(x) for x in struct_coords) for j in bad_idxs: G_struct.remove_node(j) print(f"Removing: {(struct_labels[j], struct_coords[j])}...") del struct_labels[j], struct_coords[j] bad_idxs = [] for val in diff2: bad_idxs.append(func_label_intensities.index(val)) bad_idxs = sorted(list(set(bad_idxs)), reverse=True) if type(func_coords) is np.ndarray: func_coords = list(tuple(x) for x in func_coords) for j in bad_idxs: G_func.remove_node(j) print(f"Removing: {(func_labels[j], func_coords[j])}...") del func_labels[j], func_coords[j] struct_mat = nx.to_numpy_array(G_struct) func_mat = nx.to_numpy_array(G_func) struct_mat = thresholding.autofix(symmetrize(remove_loops(struct_mat))) func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat))) if func_mat.shape == struct_mat.shape: func_mat[~struct_mat.astype("bool")] = 0 struct_mat[~func_mat.astype("bool")] = 0 print( "Edge disagreements after matching: ", sum(sum(abs(func_mat - struct_mat))), ) metadata = {} assert ( len(struct_coords) == len(struct_labels) == len(func_coords) == len(func_labels) == func_mat.shape[0] ) metadata["coords"] = struct_coords metadata["labels"] = struct_labels metadata_list.append(metadata) struct_mat = np.maximum(struct_mat, struct_mat.T) func_mat = np.maximum(func_mat, func_mat.T) struct_mat = thresholding.standardize(struct_mat) func_mat = thresholding.standardize(func_mat) struct_node_comm_aff_mat = community_resolution_selection( nx.from_numpy_matrix(np.abs(struct_mat)) )[1] func_node_comm_aff_mat = community_resolution_selection( nx.from_numpy_matrix(np.abs(func_mat)) )[1] struct_comms = [] for i in np.unique(struct_node_comm_aff_mat): struct_comms.append(struct_node_comm_aff_mat == i) func_comms = [] for i in np.unique(func_node_comm_aff_mat): func_comms.append(func_node_comm_aff_mat == i) sims = cosine_similarity(struct_comms, func_comms) try: struct_comm = struct_comms[np.argmax(sims, axis=0)[0]] except BaseException: print('Matching by structural communities failed...') struct_comm = struct_mat try: func_comm = func_comms[np.argmax(sims, axis=0)[0]] except BaseException: print('Matching by functional communities failed...') func_comm = func_mat comm_mask = np.equal.outer(struct_comm, func_comm).astype(bool) try: assert comm_mask.shape == struct_mat.shape == func_mat.shape except AssertionError as e: e.args += (comm_mask, comm_mask.shape, struct_mat, struct_mat.shape, func_mat, func_mat.shape) try: struct_mat[~comm_mask] = 0 except BaseException: print('Skipping community masking...') try: func_mat[~comm_mask] = 0 except BaseException: print('Skipping community masking...') struct_name = struct_graph_path.split("/rawgraph_" )[-1].split(".npy")[0] func_name = func_graph_path.split("/rawgraph_")[-1].split(".npy")[0] name = f"sub-{ID}_{atlas}_mplx_Layer-1_{struct_name}_" \ f"Layer-2_{func_name}" name_list.append(name) struct_mat = np.maximum(struct_mat, struct_mat.T) func_mat = np.maximum(func_mat, func_mat.T) try: [mldict, g_dict] = compare_motifs( struct_mat, func_mat, name, namer_dir) except BaseException: print(f"Adaptive thresholding by motif comparisons failed " f"for {name}. This usually happens when no motifs are found") return [], [], [], [] multigraph_list_all.append(list(mldict.values())[0]) graph_path_list = [] for thr in list(g_dict.keys()): multigraph_path_list_dict = {} [struct, func] = g_dict[thr] struct_out = f"{namer_dir}/struct_{atlas}_{struct_name}.npy" func_out = f"{namer_dir}/struct_{atlas}_{func_name}_" \ f"motif-{thr}.npy" np.save(struct_out, struct) np.save(func_out, func) multigraph_path_list_dict[f"struct_{atlas}_{thr}"] = struct_out multigraph_path_list_dict[f"func_{atlas}_{thr}"] = func_out graph_path_list.append(multigraph_path_list_dict) graph_path_list_all.append(graph_path_list) else: print( f"Skipping {rsn} rsn, since structural and functional graphs are " f"not identical shapes." ) return name_list, metadata_list, multigraph_list_all, graph_path_list_all
def quick_embed_viewer(embed, labels=None, lp_inds=None, rp_inds=None, left_right_indexing=False): if left_right_indexing: lp_inds = np.arange(len(embed) // 2) rp_inds = np.arange(len(embed) // 2) + len(embed) // 2 fig, axs = plt.subplots(3, 2, figsize=(20, 30)) cmds = ClassicalMDS(n_components=2) cmds_euc = cmds.fit_transform(embed) plot_df = pd.DataFrame(data=cmds_euc) plot_df["labels"] = labels plot_kws = dict( x=0, y=1, hue="labels", palette=CLASS_COLOR_DICT, legend=False, s=20, linewidth=0.5, alpha=0.7, ) ax = axs[0, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("CMDS o euclidean") cmds = ClassicalMDS(n_components=2, dissimilarity="precomputed") pdist = symmetrize(pairwise_distances(embed, metric="cosine")) cmds_cos = cmds.fit_transform(pdist) plot_df[0] = cmds_cos[:, 0] plot_df[1] = cmds_cos[:, 1] ax = axs[0, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("CMDS o cosine") tsne = TSNE(metric="euclidean") tsne_euc = tsne.fit_transform(embed) plot_df[0] = tsne_euc[:, 0] plot_df[1] = tsne_euc[:, 1] ax = axs[1, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("TSNE o euclidean") tsne = TSNE(metric="precomputed") tsne_cos = tsne.fit_transform(pdist) plot_df[0] = tsne_cos[:, 0] plot_df[1] = tsne_cos[:, 1] ax = axs[1, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("TSNE o cosine") umap = UMAP(metric="euclidean", n_neighbors=30, min_dist=1) umap_euc = umap.fit_transform(embed) plot_df[0] = umap_euc[:, 0] plot_df[1] = umap_euc[:, 1] ax = axs[2, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("UMAP o euclidean") umap = UMAP(metric="cosine", n_neighbors=30, min_dist=1) umap_cos = umap.fit_transform(embed) plot_df[0] = umap_cos[:, 0] plot_df[1] = umap_cos[:, 1] ax = axs[2, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("UMAP o cosine")
from graspy.embed import AdjacencySpectralEmbed from graspy.models import EREstimator, RDPGEstimator, SBEstimator from graspy.plot import heatmap, pairplot import pandas as pd #%% Set up some simulations from graspy.simulations import p_from_latent, sample_edges from graspy.utils import binarize, symmetrize ## Load data sns.set_context("talk") left_adj, cell_labels = load_drosophila_left(return_labels=True) left_adj_uw = left_adj.copy() left_adj_uw[left_adj_uw > 0] = 1 left_adj_uw = symmetrize(left_adj_uw, method="avg") left_adj_uw = binarize(left_adj_uw) def _check_common_inputs( figsize=None, height=None, title=None, context=None, font_scale=None, legend_name=None, ): # Handle figsize if figsize is not None: if not isinstance(figsize, tuple): msg = "figsize must be a tuple, not {}.".format(type(figsize))
for comm in communities: comm_mg = mg.copy() ids = partition[partition == comm].index inds = comm_mg.meta.index.isin(ids) comm_mg = comm_mg.reindex(inds) is_al = comm_mg.meta["Merge Class"].isin(al_classes) heatmap( comm_mg.adj, inner_hier_labels=comm_mg["Merge Class"], outer_hier_labels=is_al, hier_label_fontsize=7, figsize=(20, 20), cbar=False, ) adj = comm_mg.adj.copy() adj = symmetrize(adj, method="avg") sym_mg = MetaGraph(adj, comm_mg.meta) g_sym = sym_mg.g skeleton_labels = np.array(list(g_sym.nodes())) sub_partition, modularity = run_louvain(g_sym, 1, skeleton_labels) sub_partition = pd.Series(data=sub_partition, index=skeleton_labels) sub_partition.name = "sub-partition" sub_partition = sub_partition.reindex(comm_mg.meta.index) heatmap( comm_mg.adj, inner_hier_labels=sub_partition.values, hier_label_fontsize=7, figsize=(20, 20), cbar=False, sort_nodes=True, )
pred_labels, use_weights=True, return_counts=False) plt.figure(figsize=(20, 20)) sns.heatmap(blockmodel_df, cmap="Reds") g = nx.from_pandas_adjacency(blockmodel_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(pred_labels, return_counts=True) size_scaler = 5 size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") mini_adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(mini_adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") sym_adj = symmetrize(mini_adj) node_lap = LaplacianSpectralEmbed(n_components=1).fit_transform(sym_adj) node_lap = np.squeeze(node_lap) lap_map = dict(zip(uni_labels, node_lap)) nx.set_node_attributes(g, lap_map, name="Laplacian-2") color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color") g.nodes(data=True) nx.write_graphml(g, f"maggot_models/notebooks/outs/{FNAME}/mini_g.graphml") # %% sort minigraph based on signal flow sort_inds = np.argsort(node_signal_flow)[::-1] temp_labels = blockmodel_df.index.values temp_labels = temp_labels[sort_inds]
ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) from sklearn.manifold import MDS, Isomap, TSNE from graspy.embed import ClassicalMDS from graspy.utils import symmetrize euc_pdist = pairwise_distances(embed, metric="euclidean") euc_pdist = symmetrize(euc_pdist) cos_pdist = pairwise_distances(embed, metric="cosine") cos_pdist = symmetrize(cos_pdist) for Manifold, name in zip((ClassicalMDS, ), ("cmds", )): # MDS, Isomap, TSNE): print(name) embedder = Manifold(n_components=2, dissimilarity="precomputed") euc_embed = embedder.fit_transform(euc_pdist) embedplot(euc_embed) stashfig(f"euc-embed-{name}") cos_embed = embedder.fit_transform(cos_pdist) embedplot(cos_embed) stashfig(f"cos-embed-{name}")
block_labels, block_vert_inds, block_inds = _get_block_indices(pred_labels) block_counts = _calculate_block_counts(adj, block_inds, block_vert_inds) block_count_df = pd.DataFrame(index=block_labels, columns=block_labels, data=block_counts) #%% # uni_pred_labels, counts = np.unique(pred_labels, return_counts=True) # uni_ints = range(len(uni_pred_labels)) # label_map = dict(zip(uni_pred_labels, uni_ints)) # int_labels = np.array(itemgetter(*uni_pred_labels)(label_map)) # synapse_counts = _calculate_block_counts(adj, uni_ints, pred_labels) block_df = sbm_prob block_adj = sbm_prob.values block_labels = sbm_prob.index.values sym_adj = symmetrize(block_adj) lse_embed = LaplacianSpectralEmbed(form="DAD", n_components=1) latent = lse_embed.fit_transform(sym_adj) latent = np.squeeze(latent) block_signal_flow = signal_flow(block_adj) block_g = nx.from_pandas_adjacency(block_df, create_using=nx.DiGraph()) pos = dict(zip(block_labels, zip(latent, block_signal_flow))) weights = nx.get_edge_attributes(block_g, "weight") node_colors = np.array(itemgetter(*block_labels)(pred_color_dict)) uni_pred_labels, pred_counts = np.unique(pred_labels, return_counts=True) size_map = dict(zip(uni_pred_labels, pred_counts)) node_sizes = np.array(itemgetter(*block_labels)(size_map)) node_sizes *= 4
prob_df.drop("-1", axis=1, inplace=True) adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df print(prob_df.head()) g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(adjusted_partition, return_counts=True) size_scaler = 8 size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") sym_adj = symmetrize(adj) node_lap = AdjacencySpectralEmbed(n_components=10).fit_transform(sym_adj) # node_lap = np.squeeze(node_lap) i = 5 node_lap = node_lap[:, i] lap_map = dict(zip(uni_labels, node_lap)) nx.set_node_attributes(g, lap_map, name="Laplacian-2") pos = nx.spring_layout(g) new_pos = {} for key, val in pos.items(): new_pos[key] = val[1] nx.set_node_attributes(g, new_pos, name="Spring") color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color")