def run_fit(seed, param_grid, directed, n_init, n_jobs, co_block): # run left graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_left_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_left_df, file_obs, "cosbm_left_df") # run right graph = load_drosophila_right() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) sbm_right_df = select_sbm( graph, param_grid, directed=directed, n_jobs=n_jobs, n_init=n_init, co_block=co_block, ) save_obj(sbm_right_df, file_obs, "cosbm_right_df") return 0
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm(graph, n_components_try_range, n_block_try_range, directed=directed) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed) def metric(assignments, *args): return -compute_mse_from_assignments( assignments, graph, directed=directed) tsbm_master_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, method="bc-metric", metric=metric, ) return (sbm_master_df, rdpg_df, tsbm_master_df)
def run_sim( seed, n_blocks_range, n_verts_range, n_components_try_range, n_block_try_range, B_mat, directed, ): np.random.seed(seed) columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "n_blocks", "n_verts", ] master_sbm_df = pd.DataFrame(columns=columns) for i, n_blocks in enumerate(n_blocks_range): B_mat_trunc = B_mat[:n_blocks, :n_blocks] for j, n_verts in enumerate((n_verts_range)): graph, labels = gen_sbm(n_verts, n_blocks, B_mat_trunc) sbm_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed ) sbm_df["n_verts"] = n_verts sbm_df["n_blocks"] = n_blocks master_sbm_df = master_sbm_df.append(sbm_df, ignore_index=True, sort=True) return master_sbm_df
def run_fit( seed, n_components_try_range, n_components_try_rdpg, n_block_try_range, directed, n_sims_sbm, ): graph = load_drosophila_left() if not directed: graph = symmetrize(graph, method="avg") graph = binarize(graph) connected = is_fully_connected(graph) if not connected: heatmap(graph) plt.show() raise ValueError("input graph not connected") np.random.seed(seed) columns = columns = [ "n_params_gmm", "n_params_sbm", "rss", "mse", "score", "n_components_try", "n_block_try", "sim_ind", ] sbm_master_df = pd.DataFrame(columns=columns) for i in range(n_sims_sbm): sbm_df = select_sbm( graph, n_components_try_range, n_block_try_range, directed=directed, rank="sweep", ) sbm_df["sim_ind"] = i sbm_master_df = sbm_master_df.append(sbm_df, ignore_index=True, sort=True) save_obj(sbm_master_df, file_obs, "sbm_master_df") return 0
# # # ##%% ############# show_plots = False score = "mse" left_adj, left_labels = load_left() if show_plots: heatmap(left_adj, inner_hier_labels=left_labels, cbar=False) estimator = SBMEstimator() ap_results = fit_a_priori(estimator, left_adj, left_labels) param_grid = dict(n_blocks=list(range(1, 10))) sweep_results = select_sbm(left_adj, param_grid, n_init=25, n_jobs=-2) sweep_results best_results = get_best(sweep_results, "n_params", score_name=score, small_better=False) ##%% sns.scatterplot(data=best_results, x="n_params", y=score) sns.scatterplot(data=ap_results, x="n_params", y=score) ##%% estimator = DCSBMEstimator() ap_results = fit_a_priori(estimator, left_adj, left_labels)