def run_fit(seed, param_grid, directed, n_init, n_jobs, co_block):
    # run left
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    sbm_left_df = select_sbm(
        graph,
        param_grid,
        directed=directed,
        n_jobs=n_jobs,
        n_init=n_init,
        co_block=co_block,
    )
    save_obj(sbm_left_df, file_obs, "cosbm_left_df")

    # run right
    graph = load_drosophila_right()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    sbm_right_df = select_sbm(
        graph,
        param_grid,
        directed=directed,
        n_jobs=n_jobs,
        n_init=n_init,
        co_block=co_block,
    )
    save_obj(sbm_right_df, file_obs, "cosbm_right_df")

    return 0
Beispiel #2
0
def run_fit(
    seed,
    n_components_try_range,
    n_components_try_rdpg,
    n_block_try_range,
    directed,
    n_sims_sbm,
):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    connected = is_fully_connected(graph)

    if not connected:
        heatmap(graph)
        plt.show()
        raise ValueError("input graph not connected")

    np.random.seed(seed)

    columns = columns = [
        "n_params_gmm",
        "n_params_sbm",
        "rss",
        "mse",
        "score",
        "n_components_try",
        "n_block_try",
        "sim_ind",
    ]
    sbm_master_df = pd.DataFrame(columns=columns)
    for i in range(n_sims_sbm):
        sbm_df = select_sbm(graph,
                            n_components_try_range,
                            n_block_try_range,
                            directed=directed)
        sbm_df["sim_ind"] = i
        sbm_master_df = sbm_master_df.append(sbm_df,
                                             ignore_index=True,
                                             sort=True)

    rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed)

    def metric(assignments, *args):
        return -compute_mse_from_assignments(
            assignments, graph, directed=directed)

    tsbm_master_df = select_sbm(
        graph,
        n_components_try_range,
        n_block_try_range,
        directed=directed,
        method="bc-metric",
        metric=metric,
    )
    return (sbm_master_df, rdpg_df, tsbm_master_df)
Beispiel #3
0
def run_sim(
    seed,
    n_blocks_range,
    n_verts_range,
    n_components_try_range,
    n_block_try_range,
    B_mat,
    directed,
):
    np.random.seed(seed)
    columns = [
        "n_params_gmm",
        "n_params_sbm",
        "rss",
        "mse",
        "score",
        "n_components_try",
        "n_block_try",
        "n_blocks",
        "n_verts",
    ]
    master_sbm_df = pd.DataFrame(columns=columns)

    for i, n_blocks in enumerate(n_blocks_range):
        B_mat_trunc = B_mat[:n_blocks, :n_blocks]
        for j, n_verts in enumerate((n_verts_range)):
            graph, labels = gen_sbm(n_verts, n_blocks, B_mat_trunc)
            sbm_df = select_sbm(
                graph, n_components_try_range, n_block_try_range, directed=directed
            )
            sbm_df["n_verts"] = n_verts
            sbm_df["n_blocks"] = n_blocks
            master_sbm_df = master_sbm_df.append(sbm_df, ignore_index=True, sort=True)

    return master_sbm_df
Beispiel #4
0
def run_fit(
    seed,
    n_components_try_range,
    n_components_try_rdpg,
    n_block_try_range,
    directed,
    n_sims_sbm,
):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    connected = is_fully_connected(graph)

    if not connected:
        heatmap(graph)
        plt.show()
        raise ValueError("input graph not connected")

    np.random.seed(seed)

    columns = columns = [
        "n_params_gmm",
        "n_params_sbm",
        "rss",
        "mse",
        "score",
        "n_components_try",
        "n_block_try",
        "sim_ind",
    ]
    sbm_master_df = pd.DataFrame(columns=columns)
    for i in range(n_sims_sbm):
        sbm_df = select_sbm(
            graph,
            n_components_try_range,
            n_block_try_range,
            directed=directed,
            rank="sweep",
        )
        sbm_df["sim_ind"] = i
        sbm_master_df = sbm_master_df.append(sbm_df,
                                             ignore_index=True,
                                             sort=True)

    save_obj(sbm_master_df, file_obs, "sbm_master_df")
    return 0
Beispiel #5
0
#
#
#
##%% #############
show_plots = False
score = "mse"

left_adj, left_labels = load_left()
if show_plots:
    heatmap(left_adj, inner_hier_labels=left_labels, cbar=False)

estimator = SBMEstimator()
ap_results = fit_a_priori(estimator, left_adj, left_labels)

param_grid = dict(n_blocks=list(range(1, 10)))
sweep_results = select_sbm(left_adj, param_grid, n_init=25, n_jobs=-2)

sweep_results

best_results = get_best(sweep_results,
                        "n_params",
                        score_name=score,
                        small_better=False)
##%%
sns.scatterplot(data=best_results, x="n_params", y=score)
sns.scatterplot(data=ap_results, x="n_params", y=score)

##%%
estimator = DCSBMEstimator()
ap_results = fit_a_priori(estimator, left_adj, left_labels)