def dcsbm_objective(adj, labels): # class1_var = np.var(input[class1_inds]) # class2_var = np.var(input[class2_inds]) dcsbm = SBMEstimator() dcsbm.fit(adj, y=labels) objective = dcsbm.score(adj) return objective
def compute_mse_from_assignments(assignments, graph, directed=True, loops=False): estimator = SBMEstimator(loops=loops, directed=directed) estimator.fit(graph, y=assignments) return compute_mse(estimator, graph)
def run_fit(seed): np.random.seed(seed) # load left_graph, left_labels = load_left() right_graph, right_labels = load_right() # fit SBM left, predict right sbm_fit_left = SBMEstimator(directed=True, loops=False) sbm_fit_left.fit(left_graph, y=left_labels) right_pred_mse = mse_on_other(sbm_fit_left, right_graph, right_labels) right_pred_likelihood = likelihood_on_other(sbm_fit_left, right_graph, right_labels) right_pred_sc_likelihood = likelihood_on_other( sbm_fit_left, right_graph, right_labels, clip=1 / (right_graph.size - right_graph.shape[0]), ) right_pred_dict = { "n_params": sbm_fit_left._n_parameters(), "mse": right_pred_mse, "likelihood": right_pred_likelihood, "zc_likelihood": right_pred_likelihood, "sc_likelihood": right_pred_sc_likelihood, } right_pred_df = pd.DataFrame(right_pred_dict, index=[0]) print(right_pred_df) save_obj(right_pred_df, file_obs, "right_pred_sbm_df") # fit SBM right, predict left sbm_fit_right = SBMEstimator(directed=True, loops=False) sbm_fit_right.fit(right_graph, y=right_labels) left_pred_mse = mse_on_other(sbm_fit_right, left_graph, left_labels) left_pred_likelihood = likelihood_on_other(sbm_fit_right, left_graph, left_labels) left_pred_sc_likelihood = likelihood_on_other( sbm_fit_right, left_graph, left_labels, clip=1 / (left_graph.size - left_graph.shape[0]), ) left_pred_dict = { "n_params": sbm_fit_right._n_parameters(), "mse": left_pred_mse, "likelihood": left_pred_likelihood, "zc_likelihood": left_pred_likelihood, "sc_likelihood": left_pred_sc_likelihood, } left_pred_df = pd.DataFrame(left_pred_dict, index=[0]) print(left_pred_df) save_obj(left_pred_df, file_obs, "left_pred_sbm_df") # sbm_fit_right = SBMEstimator(directed=True, loops=False) # sbm_fit_right.fit(right_graph, y=right_labels) # right_b = sbm_fit_right.block_p_ # # save_obj(sbm_left_df, file_obs, "sbm_left_df") return 0
def test_SBM_nparams(self): e = self.estimator.fit(self.graph, y=self.labels) assert e._n_parameters() == (4) e = SBMEstimator() e.fit(self.graph) assert e._n_parameters() == (4 + 1) e = SBMEstimator(directed=False) e.fit(self.graph) assert e._n_parameters() == (1 + 3)
def probplot( adj, labels, log_scale=False, figsize=(20, 20), cmap="Purples", title="Edge probability", vmin=0, vmax=None, ax=None, font_scale=1, ): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels = np.unique(labels) cbar_kws = {"fraction": 0.08, "shrink": 0.8, "pad": 0.03} if log_scale: data = data + 0.001 vmin = data.min().min() vmax = data.max().max() log_norm = LogNorm(vmin=vmin, vmax=vmax) cbar_ticks = [ math.pow(10, i) for i in range( math.floor(math.log10(data.min().min())), 1 + math.ceil(math.log10(data.max().max())), ) ] cbar_kws["ticks"] = cbar_ticks prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) if ax is None: plt.figure(figsize=figsize) ax = plt.gca() ax.set_title(title, pad=30, fontsize=30) sns.set_context("talk", font_scale=font_scale) heatmap_kws = dict( cbar_kws=cbar_kws, annot=True, square=True, cmap=cmap, vmin=vmin, vmax=vmax ) if log_scale: heatmap_kws["norm"] = log_norm if ax is not None: heatmap_kws["ax"] = ax ax.tick_params(axis="both", which="major", labelsize=30) # ax.tick_params(axis="both", which="minor", labelsize=8) ax = sns.heatmap(prob_df, **heatmap_kws) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) return ax, prob_df
def get_sbm_prob(adj, labels): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels, counts = np.unique(labels, return_counts=True) sort_inds = np.argsort(counts)[::-1] uni_labels = uni_labels[sort_inds] data = data[np.ix_(sort_inds, sort_inds)] prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) return prob_df
def test_SBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) sbe.fit(graph) assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
def test_SBM_fit_supervised(self): np.random.seed(8888) B = np.array([ [0.9, 0.2, 0.05, 0.1], [0.1, 0.7, 0.1, 0.1], [0.2, 0.4, 0.8, 0.5], [0.1, 0.2, 0.1, 0.7], ]) n = np.array([500, 500, 250, 250]) g = sbm(n, B, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) labels = _n_to_labels(n) sbe.fit(g, y=labels) B_hat = sbe.block_p_ assert_allclose(B_hat, B, atol=0.01)
def test_SBM_inputs(self): with pytest.raises(TypeError): SBMEstimator(directed="hey") with pytest.raises(TypeError): SBMEstimator(loops=6) with pytest.raises(TypeError): SBMEstimator(n_components="XD") with pytest.raises(ValueError): SBMEstimator(n_components=-1) with pytest.raises(TypeError): SBMEstimator(min_comm="1") with pytest.raises(ValueError): SBMEstimator(min_comm=-1) with pytest.raises(TypeError): SBMEstimator(max_comm="ay") with pytest.raises(ValueError): SBMEstimator(max_comm=-1) with pytest.raises(ValueError): SBMEstimator(min_comm=4, max_comm=2) graph = er_np(100, 0.5) bad_y = np.zeros(99) sbe = SBMEstimator() with pytest.raises(ValueError): sbe.fit(graph, y=bad_y) with pytest.raises(ValueError): sbe.fit(graph[:, :99]) with pytest.raises(ValueError): sbe.fit(graph[..., np.newaxis]) with pytest.raises(TypeError): SBMEstimator(cluster_kws=1) with pytest.raises(TypeError): SBMEstimator(embed_kws=1)
# %% [markdown] # ## from graspy.models import SBMEstimator level = 2 n_row = 3 n_col = 7 scale = 10 fig, axs = plt.subplots(n_row, n_col, figsize=(n_row * scale, n_col * scale)) for level in range(8): label_name = f"lvl{level}_labels_side" sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(full_adj), full_meta[label_name].values) ax = axs[1, level] _, _, top, _ = adjplot( sbm.p_mat_, ax=ax, plot_type="heatmap", sort_class=["hemisphere"] + level_names[: level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), cbar_kws=dict(shrink=0.6), )
#%% from graspy.models import SBMEstimator from src.data import load_new_left from graspy.plot import heatmap import numpy as np adj, labels = load_new_left() sbm = SBMEstimator(loops=False, co_block=False) sbm.fit(adj, y=labels) heatmap(sbm.p_mat_, inner_hier_labels=labels, vmin=0, vmax=1) #%% co_labels = np.stack((labels, labels), axis=1).astype("U3") for i, row in enumerate(co_labels): if row[1] == "O" or row[1] == "I": co_labels[i, 1] = "O/I" co_labels #%% cosbm = SBMEstimator(loops=False, co_block=True) cosbm.fit(adj, y=co_labels) heatmap(cosbm.p_mat_, inner_hier_labels=labels) #%%
right_graph, right_labels = load_right() np.random.seed(8888) n_init = 200 clip = 1 / (right_graph.size - right_graph.shape[0]) heatmap_kws = dict(vmin=0, vmax=1, font_scale=1.5, hier_label_fontsize=20, cbar=False) fig, ax = plt.subplots(4, 2, figsize=(15, 30)) # A priori SBM ap_estimator = SBMEstimator() ap_estimator.fit(right_graph, y=right_labels) lik = ap_estimator.score(right_graph, clip=clip) heatmap( right_graph, inner_hier_labels=right_labels, title="Right MB (by cell type)", ax=ax[0, 0], **heatmap_kws, ) heatmap( ap_estimator.p_mat_, inner_hier_labels=right_labels, title=f"A priori SBM, lik = {lik:.2f}", ax=ax[0, 1],
#%% import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np from graspy.models import DCSBMEstimator, RDPGEstimator, SBMEstimator from graspy.plot import heatmap from src.data import load_right # Load data right_adj, right_labels = load_right() # Fit the models sbm = SBMEstimator(directed=True, loops=False) sbm.fit(right_adj, y=right_labels) dcsbm = DCSBMEstimator(degree_directed=False, directed=True, loops=False) dcsbm.fit(right_adj, y=right_labels) rdpg = RDPGEstimator(loops=False, n_components=3) rdpg.fit(right_adj) # Plotting np.random.seed(8888) cmap = mpl.cm.get_cmap("RdBu_r") center = 0 vmin = 0 vmax = 1 norm = mpl.colors.Normalize(0, 1)
binarize(full_adj), sizes=(0.5, 0.5), ax=ax, plot_type="scattermap", sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), ) sbm = SBMEstimator(directed=True, loops=True) labels, inv = np.unique(full_meta[label_name].values, return_inverse=True) sbm.fit(binarize(full_adj), inv) ax = axs[1, level] _, _, top, _ = adjplot( sbm.p_mat_, ax=ax, plot_type="heatmap", sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), cbar_kws=dict(shrink=0.6), )
score = gmm.model_.score(latent) temp_dict = base_dict.copy() temp_dict["Metric"] = "GMM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # GMM BIC score = gmm.model_.bic(latent) temp_dict = base_dict.copy() temp_dict["Metric"] = "GMM BIC" temp_dict["Score"] = score out_dicts.append(temp_dict) # SBM likelihood sbm = SBMEstimator(directed=True, loops=False) sbm.fit(bin_adj, y=pred_labels) score = sbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "SBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # DCSBM likelihood dcsbm = DCSBMEstimator(directed=True, loops=False) dcsbm.fit(bin_adj, y=pred_labels) score = dcsbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "DCSBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict)
cbar=False, title="Adjacency matrix", inner_hier_labels=labels, sort_nodes=True, hier_label_fontsize=16, ) mean_degree = np.mean(np.sum(adj, axis=0)) print(f"Mean degree: {mean_degree:.3f}") # %% [markdown] # ## Double checking the model parameters # Below is a quick sanity check that the graph we sampled has block probabilities that are # close to what we set originally if we undo the rescaling step. # %% double checking on model params sbme = SBMEstimator(directed=False, loops=False) sbme.fit(adj, y=labels) block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws)
palette=cmap, **plt_kws, ) plt.xlabel("# Params (SBM params for SBMs)") plt.ylabel("MSE") plt.title(f"Drosophila old MB left, directed ({experiment}:{run})") plt.savefig(save_dir / "rank_sbm_Klines.pdf", format="pdf", facecolor="w") #%% from graspy.models import SBMEstimator from graspy.datasets import load_drosophila_left, load_drosophila_right from graspy.utils import binarize sbm = SBMEstimator(directed=True, loops=False) left_adj, left_labels = load_drosophila_left(return_labels=True) left_adj = binarize(left_adj) sbm.fit(left_adj, y=left_labels) sbm.mse(left_adj) sbm._n_parameters() right_adj, right_labels = load_drosophila_right(return_labels=True) er = SBMEstimator(directed=True, loops=False, n_blocks=2) er.fit(left_adj) er.mse(left_adj) heatmap(left_adj, inner_hier_labels=er.vertex_assignments_, outer_hier_labels=left_labels) #%%