def run_fit(seed, param_grid, directed, n_init, n_jobs, co_block): # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_left_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_left_df, file_obs, "cosbm_left_df") # run right graph = load_drosophila_right() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_right_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_right_df, file_obs, "cosbm_right_df") return 0
def run_fit(seed, param_grid, directed, n_init, n_jobs): # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) ddcsbm_left_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df") # run right graph = load_drosophila_right() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) ddcsbm_right_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df") return 0
def run_fit(seed, param_grid, directed, n_init, n_jobs): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) np.random.seed(seed) dcsbm_out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) ddcsbm_out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=True, n_jobs=n_jobs, n_init=n_init, ) save_obj(dcsbm_out_df, file_obs, "dcsbm_out_df") save_obj(ddcsbm_out_df, file_obs, "ddcsbm_out_df") return 0
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_init, embed_kws_try_range, n_jobs, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) np.random.seed(seed) param_grid = { "n_components": n_components_try_range, "n_blocks": n_block_try_range, "embed_kws": embed_kws_try_range, } out_df = select_dcsbm( graph, param_grid, directed=directed, degree_directed=False, n_jobs=n_jobs, n_init=n_init, ) print(out_df.head()) save_obj(out_df, file_obs, "grid_search_out") return 0
def load_right(): """ Load the right connectome. Wraps graspy """ graph, labels = load_drosophila_right(return_labels=True) graph = binarize(graph) return graph, labels
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm(graph, n_components_try_range, n_block_try_range, directed=directed) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed) def metric(assignments, *args): return -compute_mse_from_assignments( assignments, graph, directed=directed) tsbm_master_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, method="bc-metric", metric=metric, ) return (sbm_master_df, rdpg_df, tsbm_master_df)
def probplot( adj, labels, log_scale=False, figsize=(20, 20), cmap="Purples", title="Edge probability", vmin=0, vmax=None, ax=None, font_scale=1, ): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels = np.unique(labels) cbar_kws = {"fraction": 0.08, "shrink": 0.8, "pad": 0.03} if log_scale: data = data + 0.001 vmin = data.min().min() vmax = data.max().max() log_norm = LogNorm(vmin=vmin, vmax=vmax) cbar_ticks = [ math.pow(10, i) for i in range( math.floor(math.log10(data.min().min())), 1 + math.ceil(math.log10(data.max().max())), ) ] cbar_kws["ticks"] = cbar_ticks prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) if ax is None: plt.figure(figsize=figsize) ax = plt.gca() ax.set_title(title, pad=30, fontsize=30) sns.set_context("talk", font_scale=font_scale) heatmap_kws = dict( cbar_kws=cbar_kws, annot=True, square=True, cmap=cmap, vmin=vmin, vmax=vmax ) if log_scale: heatmap_kws["norm"] = log_norm if ax is not None: heatmap_kws["ax"] = ax ax.tick_params(axis="both", which="major", labelsize=30) # ax.tick_params(axis="both", which="minor", labelsize=8) ax = sns.heatmap(prob_df, **heatmap_kws) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) return ax, prob_df
def run_fit(seed, param_grid, directed, n_jobs): np.random.seed(seed) # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) rdpg_left_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs) save_obj(rdpg_left_df, file_obs, "rdpg_left_df") # run right graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) rdpg_right_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs) save_obj(rdpg_right_df, file_obs, "rdpg_right_df") return 0
def get_sbm_prob(adj, labels): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels, counts = np.unique(labels, return_counts=True) sort_inds = np.argsort(counts)[::-1] uni_labels = uni_labels[sort_inds] data = data[np.ix_(sort_inds, sort_inds)] prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) return prob_df
def get_sbm_prob(adj, labels): uni_labels, counts = np.unique(labels, return_counts=True) label_map = dict(zip(uni_labels, range(len(uni_labels)))) y = np.array(itemgetter(*labels)(label_map)) sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=y) data = sbm.block_p_ sort_inds = np.argsort(counts)[::-1] uni_labels = uni_labels[sort_inds] data = data[np.ix_(sort_inds, sort_inds)] prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) return prob_df
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, rank="sweep", ) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) save_obj(sbm_master_df, file_obs, "sbm_master_df") return 0
def load_new_right(return_full_labels=False, return_names=False): data_path = Path("./maggot_models/data/processed/") adj_path = data_path / "BP_20190424mw_right_mb_adj.csv" meta_path = data_path / "BP_20190424mw_right_mb_meta.csv" adj_df = pd.read_csv(adj_path, header=0, index_col=0) meta_df = pd.read_csv(meta_path, header=0, index_col=0) adj = adj_df.values adj = binarize(adj) labels = meta_df["simple_class"].values.astype(str) if return_full_labels: full_labels = meta_df["Class"].values.astype(str) return adj, labels, full_labels elif return_names: names = meta_df["Name"].values.astype(str) return adj, labels, names else: return adj, labels
def plot_adjacencies(full_mg, axs): pal = sns.color_palette("deep", 1) model = DCSBMEstimator for level in np.arange(lowest_level + 1): ax = axs[0, level] adj = binarize(full_mg.adj) _, _, top, _ = adjplot( adj, ax=ax, plot_type="scattermap", sizes=(0.5, 0.5), sort_class=["hemisphere"] + level_names[: level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"), color=pal[0], ) top.set_title(f"Level {level} - Data") labels = full_mg.meta[f"lvl{level}_labels_side"] estimator = model(directed=True, loops=True) uni_labels, inv = np.unique(labels, return_inverse=True) estimator.fit(adj, inv) sample_adj = np.squeeze(estimator.sample()) ax = axs[1, level] _, _, top, _ = adjplot( sample_adj, ax=ax, plot_type="scattermap", sizes=(0.5, 0.5), sort_class=["hemisphere"] + level_names[: level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"), color=pal[0], ) top.set_title(f"Level {level} - DCSBM sample")
def run_fit(seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) sbm_df = select_sbm(graph, n_components_try_range, n_block_try_range, directed=directed) rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed) return (sbm_df, rdpg_df)
dcsbm.fit(adj, y=labels) objective = dcsbm.score(adj) return objective fig, ax = plt.subplots(2, 1, figsize=(10, 10), sharex=True) sns.distplot(max_pn_prop_input, ax=ax[0]) ax[0].set_title("All neurons") objectives = np.zeros_like(thresh_range) for i, t in enumerate(thresh_range): low_inds = np.where(max_pn_prop_input < t)[0] high_inds = np.where(max_pn_prop_input >= t)[0] labels = np.zeros(adj.shape[0]) labels[high_inds] = 1 objectives[i] = dcsbm_objective(binarize(adj), labels) sns.scatterplot(x=thresh_range, y=objectives, ax=ax[1]) # ax[1].set_ylim((0, 0.015)) ax[1].set_ylabel("2-DCSBM objective val") ax[1].set_xlabel("PN input threshold (min input any subclass)") ax[1].set_xlim((0 - 0.01, 0.35 + 0.01)) ## fig, ax = plt.subplots(2, 1, figsize=(10, 10), sharex=True) sns.distplot(max_pn_prop_input, ax=ax[0]) ax[0].set_title("All neurons") objectives = np.zeros_like(thresh_range) for i, t in enumerate(thresh_range):
n_levels = 10 # %% [markdown] # ## rows = [] class DDCSBMEstimator(DCSBMEstimator): def __init__(self, **kwargs): super().__init__(degree_directed=True, **kwargs) for l in range(n_levels + 1): labels = meta[f"lvl{l}_labels"].values left_adj = binarize(adj[np.ix_(lp_inds, lp_inds)]) left_adj = remove_loops(left_adj) right_adj = binarize(adj[np.ix_(rp_inds, rp_inds)]) right_adj = remove_loops(right_adj) for model, name in zip( [DDCSBMEstimator, DCSBMEstimator, SBMEstimator], ["DDCSBM", "DCSBM", "SBM"] ): # train on left estimator = model(directed=True, loops=False) uni_labels, inv = np.unique(labels, return_inverse=True) estimator.fit(left_adj, inv[lp_inds]) train_left_p = estimator.p_mat_ train_left_p[train_left_p == 0] = 1 / train_left_p.size n_params = estimator._n_parameters() + len(uni_labels)
gmm_log_likelihood = np.sum(gclust.model_.score(X_hat[-temp_quad_labels])) #- Total likelihood likeli = surface_log_likelihood + gmm_log_likelihood + prop_log_likelihoods #- BIC bic_ = 2 * likeli - temp_n_params * np.log(n) #- ARI ari_ = ari(true_labels, temp_c_hat) return [combo, likeli, ari_, bic_] np.random.seed(16661) A = binarize(right_adj) X_hat = np.concatenate(ASE(n_components=3).fit_transform(A), axis=1) n, d = X_hat.shape gclust = GCLUST(max_components=15) est_labels = gclust.fit_predict(X_hat) loglikelihoods = [np.sum(gclust.model_.score_samples(X_hat))] combos = [None] aris = [ari(right_labels, est_labels)] bic = [gclust.model_.bic(X_hat)] unique_labels = np.unique(est_labels) class_idx = np.array([np.where(est_labels == u)[0] for u in unique_labels])
size_df = pd.concat(size_dfs) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) sns.stripplot(data=size_df, x="Level", y="Size", ax=ax, jitter=0.45, alpha=0.5) ax.set_yscale("log") ax.set_title(title) stashfig("log-sizes" + basename) # %% [markdown] # ## Fit models and compare L/R rows = [] for l in range(n_levels): labels = new_meta[f"lvl{l}_labels"].values left_adj = binarize(new_adj[np.ix_(new_lp_inds, new_lp_inds)]) left_adj = remove_loops(left_adj) right_adj = binarize(new_adj[np.ix_(new_rp_inds, new_rp_inds)]) right_adj = remove_loops(right_adj) for model, name in zip([DCSBMEstimator, SBMEstimator], ["DCSBM", "SBM"]): estimator = model(directed=True, loops=False) uni_labels, inv = np.unique(labels, return_inverse=True) estimator.fit(left_adj, inv[new_lp_inds]) train_left_p = estimator.p_mat_ train_left_p[train_left_p == 0] = 1 / train_left_p.size score = poisson.logpmf(left_adj, train_left_p).sum() rows.append( dict( train_side="left", test="same",
n_nodes_t = lcc_graph_t.shape[0] print(f"Number of remaining nodes: {n_nodes_t}") print(f"Removed {(n_nodes - n_nodes_t) / n_nodes} of nodes") #%% print("Embedding binarized graph") from graspy.plot import screeplot screeplot(embed_graph, cumulative=False, show_first=20, n_elbows=3) #%% n_components = None n_elbows = 1 embed_graph = lcc_graph_t embed_graph = binarize(lcc_graph_t) gridplot_kws["sizes"] = (10, 10) gridplot( [embed_graph], inner_hier_labels=lcc_simple_classes, outer_hier_labels=lcc_hemisphere, **gridplot_kws, ) ase = AdjacencySpectralEmbed(n_components=n_components, n_elbows=n_elbows) latent = ase.fit_transform(embed_graph) latent = np.concatenate(latent, axis=-1) pairplot(latent, title="ASE o binarized o thresholded")
from graspy.models import EREstimator, RDPGEstimator, SBEstimator from graspy.plot import heatmap, pairplot import pandas as pd #%% Set up some simulations from graspy.simulations import p_from_latent, sample_edges from graspy.utils import binarize, symmetrize ## Load data sns.set_context("talk") left_adj, cell_labels = load_drosophila_left(return_labels=True) left_adj_uw = left_adj.copy() left_adj_uw[left_adj_uw > 0] = 1 left_adj_uw = symmetrize(left_adj_uw, method="avg") left_adj_uw = binarize(left_adj_uw) def _check_common_inputs( figsize=None, height=None, title=None, context=None, font_scale=None, legend_name=None, ): # Handle figsize if figsize is not None: if not isinstance(figsize, tuple): msg = "figsize must be a tuple, not {}.".format(type(figsize)) raise TypeError(msg)
Rs.append(R) left_embed = train_embed[left_inds] left_embed = left_embed @ R right_embed = train_embed[right_inds] pred_left = models[0].model_.predict(left_embed) pred_right = models[1].model_.predict(right_embed) pred_left += len(np.unique(pred_right)) + 1 pred = np.empty(len(embed[0])) pred[left_inds] = pred_left pred[right_inds] = pred_right meta["joint_pred"] = pred ax, _, tax, _ = matrixplot( binarize(adj), plot_type="scattermap", sizes=(0.25, 0.5), col_colors="merge_class", col_palette=CLASS_COLOR_DICT, col_meta=meta, col_sort_class=["hemisphere", "joint_pred"], col_ticks=False, # col_class_order="block_sf", col_item_order="adj_sf", row_ticks=False, row_colors="merge_class", row_palette=CLASS_COLOR_DICT, row_meta=meta, row_sort_class=["hemisphere", "joint_pred"], # row_class_order="block_sf",
transform="simple-all", hier_label_fontsize=10, sort_nodes=False, cbar=False, title="Right Brain (summed 4 channels)", title_pad=90, font_scale=1.7, ) annotate_arrow(ax, (0.135, 0.88)) # Plot the adjacency matrix for the 4-color graphs fig, ax = plt.subplots(2, 2, figsize=(20, 20)) ax = ax.ravel() for i, g in enumerate(color_adjs): heatmap( binarize(g), inner_hier_labels=simple_class_labels, # transform="si", hier_label_fontsize=10, sort_nodes=False, ax=ax[i], cbar=False, title=GRAPH_TYPE_LABELS[i], title_pad=70, font_scale=1.7, ) plt.suptitle("Right Brain (4 channels)", fontsize=45, x=0.525, y=1.02) plt.tight_layout() annotate_arrow(ax[0]) savefig("4color_brain", fmt="png",
from graspy.datasets import load_drosophila_right from graspy.plot import heatmap from graspy.utils import binarize, symmetrize ''' In this script, we will try to model a larval Drosophila connectome using random graph models. Note that, in all of these models, connectivity is sampled using a Bernoulli distribution with a given probabilitiy. ''' # --------------------------------------- # Load data to be modelled # --------------------------------------- # Load Drosophila melanogaster larva, right MB connectome (Eichler et al. 2017) '''here we consider a binarized and directed version of the graph''' adj, labels = load_drosophila_right(return_labels=True) adj = binarize(adj) # adjacency matrix # Plot adjacency matrix def plotHeatmap(data, title, params={}): heatmap(X=data, inner_hier_labels=labels, hier_label_fontsize=8.0, font_scale=0.5, title=title, sort_nodes=True, **params) plotHeatmap(adj, "Drosophila right MB")
count_map = dict(zip(uni_class, counts)) names = [] colors = [] for key, val in count_map.items(): names.append(f"{key} ({count_map[key]})") colors.append(CLASS_COLOR_DICT[key]) colors = colors[::-1] # reverse because of signal flow sorting names = names[::-1] palplot(len(colors), colors, ax=ax) ax.yaxis.set_major_formatter(plt.FixedFormatter(names)) # plt.tight_layout() model = DCSBMEstimator for level in np.arange(lowest_level + 1): ax = fig.add_subplot(gs[:3, level + 4]) adj = binarize(full_mg.adj) _, _, top, _ = adjplot( adj, ax=ax, plot_type="scattermap", sizes=(0.5, 0.5), sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"), ) top.set_title(f"Level {level} - Data")
print(f"Number of edges: {np.count_nonzero(g)}") print(f"Sparsity: {np.count_nonzero(g) / (n_verts**2)}") print(f"Number of synapses: {int(g.sum())}") median_in_degree = np.median(np.count_nonzero(g, axis=0)) median_out_degree = np.median(np.count_nonzero(g, axis=1)) print(f"Median node in degree: {median_in_degree}") print(f"Median node out degree: {median_out_degree}") print() # Plot the adjacency matrix for the summed graph sns.set_context("talk", font_scale=1) plt.figure(figsize=(5, 5)) ax = heatmap( binarize(sum_adj), inner_hier_labels=class_labels, hier_label_fontsize=10, sort_nodes=False, cbar=False, title="Full Brain (summed 4 channels)", title_pad=90, font_scale=1.7, ) stashfig("full-brain-summed") # Plot the adjacency matrix for the 4-color graphs fig, ax = plt.subplots(2, 2, figsize=(20, 20)) ax = ax.ravel() for i, g in enumerate(color_adjs): heatmap(
# %% [markdown] # ## from graspy.models import SBMEstimator level = 2 n_row = 3 n_col = 7 scale = 10 fig, axs = plt.subplots(n_row, n_col, figsize=(n_row * scale, n_col * scale)) for level in range(8): label_name = f"lvl{level}_labels_side" sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(full_adj), full_meta[label_name].values) ax = axs[1, level] _, _, top, _ = adjplot( sbm.p_mat_, ax=ax, plot_type="heatmap", sort_class=["hemisphere"] + level_names[: level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), cbar_kws=dict(shrink=0.6), )
inds = np.argsort(class_counts)[::-1] uni_class = uni_class[inds] class_counts = class_counts[inds] n_clusters = 12 for k in range(2, n_clusters): skmeans = SphericalKMeans(n_clusters=k, **skmeans_kws) pred_labels = skmeans.fit_predict(latent) pred_labels = relabel(pred_labels) models.append(skmeans) # gridplot( # [adj], inner_hier_labels=pred_labels, hier_label_fontsize=18, sizes=(2, 10) # ) fig, ax = plt.subplots(1, 2, figsize=(30, 18)) heatmap( binarize(adj), inner_hier_labels=pred_labels, # outer_hier_labels=side_labels, hier_label_fontsize=18, ax=ax[0], cbar=False, sort_nodes=True, ) uni_labels = np.unique(pred_labels) # survey(pred_labels, uni_class, ax=ax[1]) survey(class_labels[:n_per_side], pred_labels, ax=ax[1]) #%% # heatmap(adj, inner_hier_labels=pred_labels)
# ## from graspy.models import SBMEstimator n_show = 7 n_row = 3 n_col = n_show scale = 10 fig, axs = plt.subplots(n_row, n_col, figsize=(n_col * scale, n_row * scale)) meta = full_mg.meta for level in range(n_show): # TODO show adjacency label_name = f"lvl{level}_labels_side" ax = axs[0, level] _, _, top, _ = adjplot( binarize(full_adj), sizes=(0.5, 0.5), ax=ax, plot_type="scattermap", sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), ) sbm = SBMEstimator(directed=True, loops=True) labels, inv = np.unique(full_meta[label_name].values, return_inverse=True) sbm.fit(binarize(full_adj), inv)
estimator.fit(adj, meta[lvl].values) for i in range(n_samples): sample = np.squeeze(estimator.sample()) sample_meta = meta.copy() sf = signal_flow(sample) sample_meta["signal_flow"] = -sf sample_mg = MetaGraph(sample, sample_meta) sample_mg = sample_mg.sort_values("signal_flow", ascending=True) prop = upper_triu_prop(sample_mg.adj) print(prop) row = {"level": lvl.replace("_labels", ""), "prop": prop} rows.append(row) print() bin_meta = meta.copy() bin_adj = binarize(adj) sf = signal_flow(bin_adj) bin_meta["signal_flow"] = -sf bin_mg = MetaGraph(bin_adj, bin_meta) bin_mb = bin_mg.sort_values("signal_flow", ascending=True) prop = upper_triu_prop(bin_mg.adj) print(prop) rows.append({"level": "data", "prop": prop}) prop_df = pd.DataFrame(rows) fig, ax = plt.subplots(1, 1, figsize=(10, 5)) sns.stripplot(data=prop_df, x="level", y="prop", ax=ax) ax.set_ylabel("Prop. in upper triangle") ax.set_xlabel("Model") stashfig("ffwdness-by-model")
def calc_model_liks(adj, meta, lp_inds, rp_inds, n_levels=10): rows = [] for l in range(n_levels + 1): labels = meta[f"lvl{l}_labels"].values left_adj = binarize(adj[np.ix_(lp_inds, lp_inds)]) left_adj = remove_loops(left_adj) right_adj = binarize(adj[np.ix_(rp_inds, rp_inds)]) right_adj = remove_loops(right_adj) for model, name in zip([DCSBMEstimator, SBMEstimator], ["DCSBM", "SBM"]): estimator = model(directed=True, loops=False) uni_labels, inv = np.unique(labels, return_inverse=True) estimator.fit(left_adj, inv[lp_inds]) train_left_p = estimator.p_mat_ train_left_p[train_left_p == 0] = 1 / train_left_p.size n_params = estimator._n_parameters() + len(uni_labels) score = poisson.logpmf(left_adj, train_left_p).sum() rows.append( dict( train_side="Left", test="Same", test_side="Left", score=score, level=l, model=name, n_params=n_params, norm_score=score / left_adj.sum(), ) ) score = poisson.logpmf(right_adj, train_left_p).sum() rows.append( dict( train_side="Left", test="Opposite", test_side="Right", score=score, level=l, model=name, n_params=n_params, norm_score=score / right_adj.sum(), ) ) estimator = model(directed=True, loops=False) estimator.fit(right_adj, inv[rp_inds]) train_right_p = estimator.p_mat_ train_right_p[train_right_p == 0] = 1 / train_right_p.size n_params = estimator._n_parameters() + len(uni_labels) score = poisson.logpmf(left_adj, train_right_p).sum() rows.append( dict( train_side="Right", test="Opposite", test_side="Left", score=score, level=l, model=name, n_params=n_params, norm_score=score / left_adj.sum(), ) ) score = poisson.logpmf(right_adj, train_right_p).sum() rows.append( dict( train_side="Right", test="Same", test_side="Right", score=score, level=l, model=name, n_params=n_params, norm_score=score / right_adj.sum(), ) ) return pd.DataFrame(rows)