def run_fit(seed, param_grid, directed, n_init, n_jobs, co_block):
    # run left
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    sbm_left_df = select_sbm(
        graph,
        param_grid,
        directed=directed,
        n_jobs=n_jobs,
        n_init=n_init,
        co_block=co_block,
    )
    save_obj(sbm_left_df, file_obs, "cosbm_left_df")

    # run right
    graph = load_drosophila_right()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    sbm_right_df = select_sbm(
        graph,
        param_grid,
        directed=directed,
        n_jobs=n_jobs,
        n_init=n_init,
        co_block=co_block,
    )
    save_obj(sbm_right_df, file_obs, "cosbm_right_df")

    return 0
def run_fit(seed, param_grid, directed, n_init, n_jobs):
    # run left
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    ddcsbm_left_df = select_dcsbm(
        graph,
        param_grid,
        directed=directed,
        degree_directed=False,
        n_jobs=n_jobs,
        n_init=n_init,
    )
    save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df")

    # run right
    graph = load_drosophila_right()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    ddcsbm_right_df = select_dcsbm(
        graph,
        param_grid,
        directed=directed,
        degree_directed=False,
        n_jobs=n_jobs,
        n_init=n_init,
    )
    save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df")

    return 0
Esempio n. 3
0
def run_fit(seed, param_grid, directed, n_init, n_jobs):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    np.random.seed(seed)

    dcsbm_out_df = select_dcsbm(
        graph,
        param_grid,
        directed=directed,
        degree_directed=False,
        n_jobs=n_jobs,
        n_init=n_init,
    )

    ddcsbm_out_df = select_dcsbm(
        graph,
        param_grid,
        directed=directed,
        degree_directed=True,
        n_jobs=n_jobs,
        n_init=n_init,
    )

    save_obj(dcsbm_out_df, file_obs, "dcsbm_out_df")
    save_obj(ddcsbm_out_df, file_obs, "ddcsbm_out_df")
    return 0
def run_fit(
    seed,
    n_components_try_range,
    n_components_try_rdpg,
    n_block_try_range,
    directed,
    n_init,
    embed_kws_try_range,
    n_jobs,
):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    np.random.seed(seed)

    param_grid = {
        "n_components": n_components_try_range,
        "n_blocks": n_block_try_range,
        "embed_kws": embed_kws_try_range,
    }
    out_df = select_dcsbm(
        graph,
        param_grid,
        directed=directed,
        degree_directed=False,
        n_jobs=n_jobs,
        n_init=n_init,
    )

    print(out_df.head())

    save_obj(out_df, file_obs, "grid_search_out")
    return 0
Esempio n. 5
0
def load_right():
    """
    Load the right connectome. Wraps graspy
    """
    graph, labels = load_drosophila_right(return_labels=True)
    graph = binarize(graph)
    return graph, labels
Esempio n. 6
0
def run_fit(
    seed,
    n_components_try_range,
    n_components_try_rdpg,
    n_block_try_range,
    directed,
    n_sims_sbm,
):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    connected = is_fully_connected(graph)

    if not connected:
        heatmap(graph)
        plt.show()
        raise ValueError("input graph not connected")

    np.random.seed(seed)

    columns = columns = [
        "n_params_gmm",
        "n_params_sbm",
        "rss",
        "mse",
        "score",
        "n_components_try",
        "n_block_try",
        "sim_ind",
    ]
    sbm_master_df = pd.DataFrame(columns=columns)
    for i in range(n_sims_sbm):
        sbm_df = select_sbm(graph,
                            n_components_try_range,
                            n_block_try_range,
                            directed=directed)
        sbm_df["sim_ind"] = i
        sbm_master_df = sbm_master_df.append(sbm_df,
                                             ignore_index=True,
                                             sort=True)

    rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed)

    def metric(assignments, *args):
        return -compute_mse_from_assignments(
            assignments, graph, directed=directed)

    tsbm_master_df = select_sbm(
        graph,
        n_components_try_range,
        n_block_try_range,
        directed=directed,
        method="bc-metric",
        metric=metric,
    )
    return (sbm_master_df, rdpg_df, tsbm_master_df)
Esempio n. 7
0
def probplot(
    adj,
    labels,
    log_scale=False,
    figsize=(20, 20),
    cmap="Purples",
    title="Edge probability",
    vmin=0,
    vmax=None,
    ax=None,
    font_scale=1,
):
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=labels)
    data = sbm.block_p_
    uni_labels = np.unique(labels)

    cbar_kws = {"fraction": 0.08, "shrink": 0.8, "pad": 0.03}

    if log_scale:
        data = data + 0.001
        vmin = data.min().min()
        vmax = data.max().max()
        log_norm = LogNorm(vmin=vmin, vmax=vmax)
        cbar_ticks = [
            math.pow(10, i)
            for i in range(
                math.floor(math.log10(data.min().min())),
                1 + math.ceil(math.log10(data.max().max())),
            )
        ]
        cbar_kws["ticks"] = cbar_ticks

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    if ax is None:
        plt.figure(figsize=figsize)
        ax = plt.gca()

    ax.set_title(title, pad=30, fontsize=30)

    sns.set_context("talk", font_scale=font_scale)

    heatmap_kws = dict(
        cbar_kws=cbar_kws, annot=True, square=True, cmap=cmap, vmin=vmin, vmax=vmax
    )
    if log_scale:
        heatmap_kws["norm"] = log_norm
    if ax is not None:
        heatmap_kws["ax"] = ax
    ax.tick_params(axis="both", which="major", labelsize=30)
    # ax.tick_params(axis="both", which="minor", labelsize=8)
    ax = sns.heatmap(prob_df, **heatmap_kws)

    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)

    return ax, prob_df
def run_fit(seed, param_grid, directed, n_jobs):
    np.random.seed(seed)

    # run left
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    rdpg_left_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs)
    save_obj(rdpg_left_df, file_obs, "rdpg_left_df")

    # run right
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)
    rdpg_right_df = select_rdpg(graph, param_grid, directed=directed, n_jobs=n_jobs)
    save_obj(rdpg_right_df, file_obs, "rdpg_right_df")

    return 0
Esempio n. 9
0
def get_sbm_prob(adj, labels):
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=labels)
    data = sbm.block_p_
    uni_labels, counts = np.unique(labels, return_counts=True)
    sort_inds = np.argsort(counts)[::-1]
    uni_labels = uni_labels[sort_inds]
    data = data[np.ix_(sort_inds, sort_inds)]

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    return prob_df
Esempio n. 10
0
def get_sbm_prob(adj, labels):
    uni_labels, counts = np.unique(labels, return_counts=True)
    label_map = dict(zip(uni_labels, range(len(uni_labels))))
    y = np.array(itemgetter(*labels)(label_map))
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=y)
    data = sbm.block_p_
    sort_inds = np.argsort(counts)[::-1]
    uni_labels = uni_labels[sort_inds]
    data = data[np.ix_(sort_inds, sort_inds)]

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    return prob_df
Esempio n. 11
0
def run_fit(
    seed,
    n_components_try_range,
    n_components_try_rdpg,
    n_block_try_range,
    directed,
    n_sims_sbm,
):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    connected = is_fully_connected(graph)

    if not connected:
        heatmap(graph)
        plt.show()
        raise ValueError("input graph not connected")

    np.random.seed(seed)

    columns = columns = [
        "n_params_gmm",
        "n_params_sbm",
        "rss",
        "mse",
        "score",
        "n_components_try",
        "n_block_try",
        "sim_ind",
    ]
    sbm_master_df = pd.DataFrame(columns=columns)
    for i in range(n_sims_sbm):
        sbm_df = select_sbm(
            graph,
            n_components_try_range,
            n_block_try_range,
            directed=directed,
            rank="sweep",
        )
        sbm_df["sim_ind"] = i
        sbm_master_df = sbm_master_df.append(sbm_df,
                                             ignore_index=True,
                                             sort=True)

    save_obj(sbm_master_df, file_obs, "sbm_master_df")
    return 0
Esempio n. 12
0
def load_new_right(return_full_labels=False, return_names=False):
    data_path = Path("./maggot_models/data/processed/")
    adj_path = data_path / "BP_20190424mw_right_mb_adj.csv"
    meta_path = data_path / "BP_20190424mw_right_mb_meta.csv"
    adj_df = pd.read_csv(adj_path, header=0, index_col=0)
    meta_df = pd.read_csv(meta_path, header=0, index_col=0)
    adj = adj_df.values
    adj = binarize(adj)
    labels = meta_df["simple_class"].values.astype(str)
    if return_full_labels:
        full_labels = meta_df["Class"].values.astype(str)
        return adj, labels, full_labels
    elif return_names:
        names = meta_df["Name"].values.astype(str)
        return adj, labels, names
    else:
        return adj, labels
Esempio n. 13
0
def plot_adjacencies(full_mg, axs):
    pal = sns.color_palette("deep", 1)
    model = DCSBMEstimator
    for level in np.arange(lowest_level + 1):
        ax = axs[0, level]
        adj = binarize(full_mg.adj)
        _, _, top, _ = adjplot(
            adj,
            ax=ax,
            plot_type="scattermap",
            sizes=(0.5, 0.5),
            sort_class=["hemisphere"] + level_names[: level + 1],
            item_order=["merge_class_sf_order", "merge_class", "sf"],
            class_order="sf",
            meta=full_mg.meta,
            palette=CLASS_COLOR_DICT,
            colors="merge_class",
            ticks=False,
            gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"),
            color=pal[0],
        )
        top.set_title(f"Level {level} - Data")

        labels = full_mg.meta[f"lvl{level}_labels_side"]
        estimator = model(directed=True, loops=True)
        uni_labels, inv = np.unique(labels, return_inverse=True)
        estimator.fit(adj, inv)
        sample_adj = np.squeeze(estimator.sample())
        ax = axs[1, level]
        _, _, top, _ = adjplot(
            sample_adj,
            ax=ax,
            plot_type="scattermap",
            sizes=(0.5, 0.5),
            sort_class=["hemisphere"] + level_names[: level + 1],
            item_order=["merge_class_sf_order", "merge_class", "sf"],
            class_order="sf",
            meta=full_mg.meta,
            palette=CLASS_COLOR_DICT,
            colors="merge_class",
            ticks=False,
            gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"),
            color=pal[0],
        )
        top.set_title(f"Level {level} - DCSBM sample")
Esempio n. 14
0
def run_fit(seed, n_components_try_range, n_components_try_rdpg,
            n_block_try_range, directed):
    graph = load_drosophila_left()
    if not directed:
        graph = symmetrize(graph, method="avg")
    graph = binarize(graph)

    connected = is_fully_connected(graph)

    if not connected:
        heatmap(graph)
        plt.show()
        raise ValueError("input graph not connected")

    np.random.seed(seed)

    sbm_df = select_sbm(graph,
                        n_components_try_range,
                        n_block_try_range,
                        directed=directed)
    rdpg_df = select_rdpg(graph, n_components_try_rdpg, directed)
    return (sbm_df, rdpg_df)
Esempio n. 15
0
    dcsbm.fit(adj, y=labels)
    objective = dcsbm.score(adj)
    return objective


fig, ax = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
sns.distplot(max_pn_prop_input, ax=ax[0])
ax[0].set_title("All neurons")

objectives = np.zeros_like(thresh_range)
for i, t in enumerate(thresh_range):
    low_inds = np.where(max_pn_prop_input < t)[0]
    high_inds = np.where(max_pn_prop_input >= t)[0]
    labels = np.zeros(adj.shape[0])
    labels[high_inds] = 1
    objectives[i] = dcsbm_objective(binarize(adj), labels)

sns.scatterplot(x=thresh_range, y=objectives, ax=ax[1])
# ax[1].set_ylim((0, 0.015))
ax[1].set_ylabel("2-DCSBM objective val")
ax[1].set_xlabel("PN input threshold (min input any subclass)")
ax[1].set_xlim((0 - 0.01, 0.35 + 0.01))

##

fig, ax = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
sns.distplot(max_pn_prop_input, ax=ax[0])
ax[0].set_title("All neurons")

objectives = np.zeros_like(thresh_range)
for i, t in enumerate(thresh_range):
Esempio n. 16
0
n_levels = 10


# %% [markdown]
# ##
rows = []


class DDCSBMEstimator(DCSBMEstimator):
    def __init__(self, **kwargs):
        super().__init__(degree_directed=True, **kwargs)


for l in range(n_levels + 1):
    labels = meta[f"lvl{l}_labels"].values
    left_adj = binarize(adj[np.ix_(lp_inds, lp_inds)])
    left_adj = remove_loops(left_adj)
    right_adj = binarize(adj[np.ix_(rp_inds, rp_inds)])
    right_adj = remove_loops(right_adj)
    for model, name in zip(
        [DDCSBMEstimator, DCSBMEstimator, SBMEstimator], ["DDCSBM", "DCSBM", "SBM"]
    ):
        # train on left
        estimator = model(directed=True, loops=False)
        uni_labels, inv = np.unique(labels, return_inverse=True)
        estimator.fit(left_adj, inv[lp_inds])
        train_left_p = estimator.p_mat_
        train_left_p[train_left_p == 0] = 1 / train_left_p.size

        n_params = estimator._n_parameters() + len(uni_labels)
    gmm_log_likelihood = np.sum(gclust.model_.score(X_hat[-temp_quad_labels]))

    #- Total likelihood
    likeli = surface_log_likelihood + gmm_log_likelihood + prop_log_likelihoods

    #- BIC
    bic_ = 2 * likeli - temp_n_params * np.log(n)

    #- ARI
    ari_ = ari(true_labels, temp_c_hat)

    return [combo, likeli, ari_, bic_]


np.random.seed(16661)
A = binarize(right_adj)
X_hat = np.concatenate(ASE(n_components=3).fit_transform(A), axis=1)
n, d = X_hat.shape

gclust = GCLUST(max_components=15)
est_labels = gclust.fit_predict(X_hat)

loglikelihoods = [np.sum(gclust.model_.score_samples(X_hat))]
combos = [None]
aris = [ari(right_labels, est_labels)]
bic = [gclust.model_.bic(X_hat)]

unique_labels = np.unique(est_labels)

class_idx = np.array([np.where(est_labels == u)[0] for u in unique_labels])
Esempio n. 18
0
size_df = pd.concat(size_dfs)
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
sns.stripplot(data=size_df, x="Level", y="Size", ax=ax, jitter=0.45, alpha=0.5)
ax.set_yscale("log")
ax.set_title(title)
stashfig("log-sizes" + basename)

# %% [markdown]
# ## Fit models and compare L/R

rows = []

for l in range(n_levels):
    labels = new_meta[f"lvl{l}_labels"].values
    left_adj = binarize(new_adj[np.ix_(new_lp_inds, new_lp_inds)])
    left_adj = remove_loops(left_adj)
    right_adj = binarize(new_adj[np.ix_(new_rp_inds, new_rp_inds)])
    right_adj = remove_loops(right_adj)
    for model, name in zip([DCSBMEstimator, SBMEstimator], ["DCSBM", "SBM"]):
        estimator = model(directed=True, loops=False)
        uni_labels, inv = np.unique(labels, return_inverse=True)
        estimator.fit(left_adj, inv[new_lp_inds])
        train_left_p = estimator.p_mat_
        train_left_p[train_left_p == 0] = 1 / train_left_p.size

        score = poisson.logpmf(left_adj, train_left_p).sum()
        rows.append(
            dict(
                train_side="left",
                test="same",
Esempio n. 19
0
n_nodes_t = lcc_graph_t.shape[0]
print(f"Number of remaining nodes: {n_nodes_t}")
print(f"Removed {(n_nodes - n_nodes_t) / n_nodes} of nodes")


#%%

print("Embedding binarized graph")
from graspy.plot import screeplot

screeplot(embed_graph, cumulative=False, show_first=20, n_elbows=3)
#%%
n_components = None
n_elbows = 1
embed_graph = lcc_graph_t
embed_graph = binarize(lcc_graph_t)

gridplot_kws["sizes"] = (10, 10)
gridplot(
    [embed_graph],
    inner_hier_labels=lcc_simple_classes,
    outer_hier_labels=lcc_hemisphere,
    **gridplot_kws,
)

ase = AdjacencySpectralEmbed(n_components=n_components, n_elbows=n_elbows)
latent = ase.fit_transform(embed_graph)
latent = np.concatenate(latent, axis=-1)
pairplot(latent, title="ASE o binarized o thresholded")

Esempio n. 20
0
from graspy.models import EREstimator, RDPGEstimator, SBEstimator
from graspy.plot import heatmap, pairplot
import pandas as pd

#%% Set up some simulations
from graspy.simulations import p_from_latent, sample_edges
from graspy.utils import binarize, symmetrize

## Load data
sns.set_context("talk")
left_adj, cell_labels = load_drosophila_left(return_labels=True)
left_adj_uw = left_adj.copy()
left_adj_uw[left_adj_uw > 0] = 1

left_adj_uw = symmetrize(left_adj_uw, method="avg")
left_adj_uw = binarize(left_adj_uw)


def _check_common_inputs(
    figsize=None,
    height=None,
    title=None,
    context=None,
    font_scale=None,
    legend_name=None,
):
    # Handle figsize
    if figsize is not None:
        if not isinstance(figsize, tuple):
            msg = "figsize must be a tuple, not {}.".format(type(figsize))
            raise TypeError(msg)
Esempio n. 21
0
Rs.append(R)
left_embed = train_embed[left_inds]
left_embed = left_embed @ R
right_embed = train_embed[right_inds]

pred_left = models[0].model_.predict(left_embed)
pred_right = models[1].model_.predict(right_embed)
pred_left += len(np.unique(pred_right)) + 1

pred = np.empty(len(embed[0]))
pred[left_inds] = pred_left
pred[right_inds] = pred_right
meta["joint_pred"] = pred

ax, _, tax, _ = matrixplot(
    binarize(adj),
    plot_type="scattermap",
    sizes=(0.25, 0.5),
    col_colors="merge_class",
    col_palette=CLASS_COLOR_DICT,
    col_meta=meta,
    col_sort_class=["hemisphere", "joint_pred"],
    col_ticks=False,
    # col_class_order="block_sf",
    col_item_order="adj_sf",
    row_ticks=False,
    row_colors="merge_class",
    row_palette=CLASS_COLOR_DICT,
    row_meta=meta,
    row_sort_class=["hemisphere", "joint_pred"],
    # row_class_order="block_sf",
Esempio n. 22
0
    transform="simple-all",
    hier_label_fontsize=10,
    sort_nodes=False,
    cbar=False,
    title="Right Brain (summed 4 channels)",
    title_pad=90,
    font_scale=1.7,
)
annotate_arrow(ax, (0.135, 0.88))

# Plot the adjacency matrix for the 4-color graphs
fig, ax = plt.subplots(2, 2, figsize=(20, 20))
ax = ax.ravel()
for i, g in enumerate(color_adjs):
    heatmap(
        binarize(g),
        inner_hier_labels=simple_class_labels,
        # transform="si",
        hier_label_fontsize=10,
        sort_nodes=False,
        ax=ax[i],
        cbar=False,
        title=GRAPH_TYPE_LABELS[i],
        title_pad=70,
        font_scale=1.7,
    )
plt.suptitle("Right Brain (4 channels)", fontsize=45, x=0.525, y=1.02)
plt.tight_layout()
annotate_arrow(ax[0])
savefig("4color_brain",
        fmt="png",
from graspy.datasets import load_drosophila_right
from graspy.plot import heatmap
from graspy.utils import binarize, symmetrize
''' In this script, we will try to model a larval Drosophila
connectome using random graph models. Note that, in all of these
models, connectivity is sampled using a Bernoulli distribution
with a given probabilitiy. '''

# ---------------------------------------
# Load data to be modelled
# ---------------------------------------

# Load Drosophila melanogaster larva, right MB connectome (Eichler et al. 2017)
'''here we consider a binarized and directed version of the graph'''
adj, labels = load_drosophila_right(return_labels=True)
adj = binarize(adj)  # adjacency matrix


# Plot adjacency matrix
def plotHeatmap(data, title, params={}):
    heatmap(X=data,
            inner_hier_labels=labels,
            hier_label_fontsize=8.0,
            font_scale=0.5,
            title=title,
            sort_nodes=True,
            **params)


plotHeatmap(adj, "Drosophila right MB")
Esempio n. 24
0
count_map = dict(zip(uni_class, counts))
names = []
colors = []
for key, val in count_map.items():
    names.append(f"{key} ({count_map[key]})")
    colors.append(CLASS_COLOR_DICT[key])
colors = colors[::-1]  # reverse because of signal flow sorting
names = names[::-1]
palplot(len(colors), colors, ax=ax)
ax.yaxis.set_major_formatter(plt.FixedFormatter(names))

# plt.tight_layout()
model = DCSBMEstimator
for level in np.arange(lowest_level + 1):
    ax = fig.add_subplot(gs[:3, level + 4])
    adj = binarize(full_mg.adj)
    _, _, top, _ = adjplot(
        adj,
        ax=ax,
        plot_type="scattermap",
        sizes=(0.5, 0.5),
        sort_class=["hemisphere"] + level_names[:level + 1],
        item_order=["merge_class_sf_order", "merge_class", "sf"],
        class_order="sf",
        meta=full_mg.meta,
        palette=CLASS_COLOR_DICT,
        colors="merge_class",
        ticks=False,
        gridline_kws=dict(linewidth=0.2, color="grey", linestyle="--"),
    )
    top.set_title(f"Level {level} - Data")
Esempio n. 25
0
    print(f"Number of edges: {np.count_nonzero(g)}")
    print(f"Sparsity: {np.count_nonzero(g) / (n_verts**2)}")
    print(f"Number of synapses: {int(g.sum())}")
    median_in_degree = np.median(np.count_nonzero(g, axis=0))
    median_out_degree = np.median(np.count_nonzero(g, axis=1))
    print(f"Median node in degree: {median_in_degree}")
    print(f"Median node out degree: {median_out_degree}")
    print()


# Plot the adjacency matrix for the summed graph
sns.set_context("talk", font_scale=1)

plt.figure(figsize=(5, 5))
ax = heatmap(
    binarize(sum_adj),
    inner_hier_labels=class_labels,
    hier_label_fontsize=10,
    sort_nodes=False,
    cbar=False,
    title="Full Brain (summed 4 channels)",
    title_pad=90,
    font_scale=1.7,
)
stashfig("full-brain-summed")

# Plot the adjacency matrix for the 4-color graphs
fig, ax = plt.subplots(2, 2, figsize=(20, 20))
ax = ax.ravel()
for i, g in enumerate(color_adjs):
    heatmap(
Esempio n. 26
0
# %% [markdown]
# ##

from graspy.models import SBMEstimator

level = 2

n_row = 3
n_col = 7
scale = 10
fig, axs = plt.subplots(n_row, n_col, figsize=(n_row * scale, n_col * scale))

for level in range(8):
    label_name = f"lvl{level}_labels_side"
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(full_adj), full_meta[label_name].values)
    ax = axs[1, level]
    _, _, top, _ = adjplot(
        sbm.p_mat_,
        ax=ax,
        plot_type="heatmap",
        sort_class=["hemisphere"] + level_names[: level + 1],
        item_order=["merge_class_sf_order", "merge_class", "sf"],
        class_order="sf",
        meta=full_mg.meta,
        palette=CLASS_COLOR_DICT,
        colors="merge_class",
        ticks=False,
        gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"),
        cbar_kws=dict(shrink=0.6),
    )
inds = np.argsort(class_counts)[::-1]
uni_class = uni_class[inds]
class_counts = class_counts[inds]

n_clusters = 12
for k in range(2, n_clusters):
    skmeans = SphericalKMeans(n_clusters=k, **skmeans_kws)
    pred_labels = skmeans.fit_predict(latent)
    pred_labels = relabel(pred_labels)
    models.append(skmeans)

    # gridplot(
    #     [adj], inner_hier_labels=pred_labels, hier_label_fontsize=18, sizes=(2, 10)
    # )
    fig, ax = plt.subplots(1, 2, figsize=(30, 18))
    heatmap(
        binarize(adj),
        inner_hier_labels=pred_labels,
        # outer_hier_labels=side_labels,
        hier_label_fontsize=18,
        ax=ax[0],
        cbar=False,
        sort_nodes=True,
    )
    uni_labels = np.unique(pred_labels)
    # survey(pred_labels, uni_class, ax=ax[1])
    survey(class_labels[:n_per_side], pred_labels, ax=ax[1])

#%%
# heatmap(adj, inner_hier_labels=pred_labels)
Esempio n. 28
0
# ##

from graspy.models import SBMEstimator

n_show = 7
n_row = 3
n_col = n_show
scale = 10
fig, axs = plt.subplots(n_row, n_col, figsize=(n_col * scale, n_row * scale))
meta = full_mg.meta
for level in range(n_show):
    # TODO show adjacency
    label_name = f"lvl{level}_labels_side"
    ax = axs[0, level]
    _, _, top, _ = adjplot(
        binarize(full_adj),
        sizes=(0.5, 0.5),
        ax=ax,
        plot_type="scattermap",
        sort_class=["hemisphere"] + level_names[:level + 1],
        item_order=["merge_class_sf_order", "merge_class", "sf"],
        class_order="sf",
        meta=meta,
        palette=CLASS_COLOR_DICT,
        colors="merge_class",
        ticks=False,
        gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"),
    )
    sbm = SBMEstimator(directed=True, loops=True)
    labels, inv = np.unique(full_meta[label_name].values, return_inverse=True)
    sbm.fit(binarize(full_adj), inv)
Esempio n. 29
0
    estimator.fit(adj, meta[lvl].values)
    for i in range(n_samples):
        sample = np.squeeze(estimator.sample())
        sample_meta = meta.copy()
        sf = signal_flow(sample)
        sample_meta["signal_flow"] = -sf
        sample_mg = MetaGraph(sample, sample_meta)
        sample_mg = sample_mg.sort_values("signal_flow", ascending=True)
        prop = upper_triu_prop(sample_mg.adj)
        print(prop)
        row = {"level": lvl.replace("_labels", ""), "prop": prop}
        rows.append(row)
    print()

bin_meta = meta.copy()
bin_adj = binarize(adj)
sf = signal_flow(bin_adj)
bin_meta["signal_flow"] = -sf
bin_mg = MetaGraph(bin_adj, bin_meta)
bin_mb = bin_mg.sort_values("signal_flow", ascending=True)
prop = upper_triu_prop(bin_mg.adj)
print(prop)

rows.append({"level": "data", "prop": prop})
prop_df = pd.DataFrame(rows)

fig, ax = plt.subplots(1, 1, figsize=(10, 5))
sns.stripplot(data=prop_df, x="level", y="prop", ax=ax)
ax.set_ylabel("Prop. in upper triangle")
ax.set_xlabel("Model")
stashfig("ffwdness-by-model")
Esempio n. 30
0
def calc_model_liks(adj, meta, lp_inds, rp_inds, n_levels=10):
    rows = []
    for l in range(n_levels + 1):
        labels = meta[f"lvl{l}_labels"].values
        left_adj = binarize(adj[np.ix_(lp_inds, lp_inds)])
        left_adj = remove_loops(left_adj)
        right_adj = binarize(adj[np.ix_(rp_inds, rp_inds)])
        right_adj = remove_loops(right_adj)
        for model, name in zip([DCSBMEstimator, SBMEstimator], ["DCSBM", "SBM"]):
            estimator = model(directed=True, loops=False)
            uni_labels, inv = np.unique(labels, return_inverse=True)
            estimator.fit(left_adj, inv[lp_inds])
            train_left_p = estimator.p_mat_
            train_left_p[train_left_p == 0] = 1 / train_left_p.size

            n_params = estimator._n_parameters() + len(uni_labels)

            score = poisson.logpmf(left_adj, train_left_p).sum()
            rows.append(
                dict(
                    train_side="Left",
                    test="Same",
                    test_side="Left",
                    score=score,
                    level=l,
                    model=name,
                    n_params=n_params,
                    norm_score=score / left_adj.sum(),
                )
            )
            score = poisson.logpmf(right_adj, train_left_p).sum()
            rows.append(
                dict(
                    train_side="Left",
                    test="Opposite",
                    test_side="Right",
                    score=score,
                    level=l,
                    model=name,
                    n_params=n_params,
                    norm_score=score / right_adj.sum(),
                )
            )

            estimator = model(directed=True, loops=False)
            estimator.fit(right_adj, inv[rp_inds])
            train_right_p = estimator.p_mat_
            train_right_p[train_right_p == 0] = 1 / train_right_p.size

            n_params = estimator._n_parameters() + len(uni_labels)

            score = poisson.logpmf(left_adj, train_right_p).sum()
            rows.append(
                dict(
                    train_side="Right",
                    test="Opposite",
                    test_side="Left",
                    score=score,
                    level=l,
                    model=name,
                    n_params=n_params,
                    norm_score=score / left_adj.sum(),
                )
            )
            score = poisson.logpmf(right_adj, train_right_p).sum()
            rows.append(
                dict(
                    train_side="Right",
                    test="Same",
                    test_side="Right",
                    score=score,
                    level=l,
                    model=name,
                    n_params=n_params,
                    norm_score=score / right_adj.sum(),
                )
            )
    return pd.DataFrame(rows)