Esempio n. 1
0
def pairs(embed):
    pg = pairplot(
        embed,
        labels=meta["merge_class"].values,
        palette=CLASS_COLOR_DICT,
        size=20,
        alpha=0.5,
    )
    pg._legend.remove()
Esempio n. 2
0
def one_iteration(start_labels, class_key="Merge Class"):
    # generate walks
    data, bins, classes = random_walk_classes(start_labels,
                                              seed=None,
                                              class_key=class_key)
    log_data = np.log10(data + 1)
    # plot the clustermap
    path_clustermap(log_data, classes, bins)
    # embed and plot by known class
    embedding = PCA(n_components=8).fit_transform(log_data)
    pairplot(embedding, labels=classes, palette=CLASS_COLOR_DICT)
    # cluster
    agm = AutoGMMCluster(min_components=2,
                         max_components=20,
                         n_jobs=-1,
                         verbose=10)
    pred_labels = agm.fit_predict(embedding)
    plt.figure()
    sns.scatterplot(data=agm.results_, x="n_components", y="bic/aic")
    # plot embedding by cluster
    pairplot(embedding, labels=pred_labels, palette=cc.glasbey_light)
    # plot predicted clusters by known class
    stacked_barplot(pred_labels, classes, color_dict=CLASS_COLOR_DICT)
    return pred_labels
Esempio n. 3
0
    plot_df.iloc[lp_inds, 1],
    plot_df.iloc[rp_inds, 1],
    ax=ax,
)

plt.tight_layout()

fig.suptitle(f"Method = {method}", y=1)
stashfig(f"procrustes-ase-{method}")

# %% [markdown]
# ## Try ranking pairs

pg = pairplot(
    norm_embed,
    labels=meta["merge_class"].values,
    palette=CLASS_COLOR_DICT,
    diag_kind="hist",
)
pg._legend.remove()

# %% [markdown]
# ##


def embedplot(embed):
    plot_df = pd.DataFrame(data=embed[:, [0, 1]])
    plot_df["merge_class"] = meta["merge_class"].values
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    sns.scatterplot(
        data=plot_df,
        x=0,
Esempio n. 4
0
    inds = np.random.choice(len(paths), size=subsample, replace=False)
    new_paths = []
    for i, p in enumerate(paths):
        if i in inds:
            new_paths.append(p)
    paths = new_paths

print(f"Number of paths after subsampling: {len(paths)}")

# %% [markdown]
# ##

embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = embedder.fit_transform(pass_to_ranks(adj))
embed = np.concatenate(embed, axis=-1)
pairplot(embed, labels=labels)

# %% [markdown]
# ## Show 2 dimensions of pairwise cosine embedding

pdist = pairwise_distances(embed, metric="cosine")

# %% [markdown]
# ##
manifold = TSNE(metric="precomputed")
# manifold = ClassicalMDS(n_components=2, dissimilarity="precomputed")
cos_embed = manifold.fit_transform(pdist)

# %% [markdown]
# ##
plot_df = pd.DataFrame(data=cos_embed)
Esempio n. 5
0
#     "tPNs": "PN",
#     "vPNs": "PN",
#     "Unidentified": "Other",
#     "Other": "Other",
# }
# mb_labels = np.array(itemgetter(*class_labels)(name_map))

# known_inds = np.where(np.logical_or(mb_labels == "MB", mb_labels == "PN"))[0]

# %% [markdown]
# # Run clustering using LSE on the sum graph

n_verts = adj.shape[0]

latent = lse(adj, n_components, regularizer=None)
pairplot(latent, labels=simple_class_labels, title=embed)

k_list = list(range(MIN_CLUSTERS, MAX_CLUSTERS + 1))
n_runs = len(k_list)
out_dicts = []

bin_adj = binarize(adj)

last_pred_labels = np.zeros(n_verts)

if cluster == "GMM":
    ClusterModel = GaussianCluster
elif cluster == "AutoGMM":
    ClusterModel = AutoGMMCluster

for k in k_list:
# plot degree sequence
d_sort = np.argsort(degrees)[::-1]
degrees = degrees[d_sort]
plt.figure(figsize=(10, 5))
sns.scatterplot(x=range(len(degrees)), y=degrees, s=30, linewidth=0)

known_inds = np.where(class_labels != "Unk")[0]

# %% [markdown]
# # Run clustering using LSE on the sum graph

n_verts = adj.shape[0]

latent = lse(adj, n_components, regularizer=None)
pairplot(latent, labels=class_labels, title=embed)

k_list = list(range(MIN_CLUSTERS, MAX_CLUSTERS + 1))
n_runs = len(k_list)
out_dicts = []

bin_adj = binarize(adj)

last_pred_labels = np.zeros(n_verts)

if cluster == "GMM":
    ClusterModel = GaussianCluster
elif cluster == "AutoGMM":
    ClusterModel = AutoGMMCluster

for k in k_list:
Esempio n. 7
0
for i in range(full_response_mat.shape[1]):
    plt.figure()
    plt.plot(full_response_mat[:, i])

# %% [markdown]
# #
from sklearn.decomposition import PCA

in_deg = adj.sum(axis=0)
in_deg[in_deg == 0] = 1
full_response_mat = full_response_mat / in_deg[:, np.newaxis]
full_response_mat[np.isinf(full_response_mat)] = 0
pca = PCA(n_components=4)
latent = pca.fit_transform(full_response_mat)
pairplot(latent, labels=class_labels)

# %% [markdown]
# #
is_sensory = np.vectorize(lambda s: s in sensory_classes)(class_labels)
inds = np.arange(len(class_labels))
sensory_inds = inds[is_sensory]

response_mats = []
n_timesteps = 1
for s in sensory_inds:
    response_mat = np.zeros((n_verts, n_timesteps))
    state_vec = np.zeros(len(sensory_labels))
    state_vec[s] = 1
    for t in range(n_timesteps):
        new_state_vec = trans_mat @ state_vec
Esempio n. 8
0
if plot_embed:
    screeplot(hist_mat.astype(float), title="Raw hist mat (full)")
    stashfig("scree-raw-mat" + basename)
    screeplot(log_mat, title="Log hist mat (full)")
    stashfig("scree-log-mat" + basename)

# %% [markdown]
# # Pairplots
if plot_embed:
    pca = PCA(n_components=6)
    embed = pca.fit_transform(log_mat)
    loadings = pca.components_.T
    pg = pairplot(
        embed,
        labels=to_class.values,
        palette=CLASS_COLOR_DICT,
        height=5,
        title="Node response embedding (log)",
    )
    pg._legend.remove()
    stashfig("node-pca-log" + basename)
    pg = pairplot(
        loadings,
        labels=from_class.values,
        height=5,
        title="Source class embedding (log)",
    )
    stashfig("source-pca-log" + basename)

    pca = PCA(n_components=6)
    embed = pca.fit_transform(hist_mat.astype(float))
Esempio n. 9
0

sns.set_context("talk", font_scale=1)
pn_types = ["ORN mPNs", "ORN uPNs", "tPNs", "vPNs"]
pn_input_props = {}
fig, ax = plt.subplots(2, 2, sharex=True, figsize=(15, 10))
ax = ax.ravel()
for i, t in enumerate(pn_types):
    pn_prop_input = calculate_from_class_input(t, class_ind_map, adj)
    pn_input_props[t] = pn_prop_input
    sns.distplot(pn_prop_input[pn_prop_input > 0], ax=ax[i], norm_hist=True)
    ax[i].set_title(t)

pn_prop_input_mat = np.array(list(pn_input_props.values())).T

pairplot(pn_prop_input_mat, col_names=pn_types)

#%% LHN - someone who received >5% input from at least one projection neuron type

sns.set_context("talk", font_scale=1)

max_pn_prop_input = pn_prop_input_mat.max(axis=1)
thresh_range = np.linspace(0, 0.35, num=50)


def var_objective(input, class1_inds, class2_inds):
    class1_var = np.var(input[class1_inds])
    class2_var = np.var(input[class2_inds])
    objective = class1_var + class2_var
    return objective
Esempio n. 10
0
joint = np.concatenate((embed, full_hop_hist.T), axis=1)

from graspy.plot import pairplot
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from src.visualization import screeplot

joint = StandardScaler(with_mean=False, with_std=True).fit_transform(joint)
screeplot(joint)
embedder = TruncatedSVD(n_components=4)
joint_embed = embedder.fit_transform(joint)


pg = pairplot(joint_embed, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT)
pg._legend.remove()


# %%

meta["inds"] = range(len(meta))
left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]
lp_inds, rp_inds = get_paired_inds(meta)
results = crossval_cluster(
    joint_embed,
    left_inds,
    right_inds,
    min_clusters=2,
    max_clusters=20,
Esempio n. 11
0
matrixplot(
    path_indicator_mat[:50, :50],
    plot_type="scattermap",
    sizes=(0.2, 0.2),
    hue="weight",
    palette=sns.color_palette("husl", n_colors=10),
    ax=ax,
)

# %% [markdown]
# ##

embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = embedder.fit_transform(adj)
embed = np.concatenate(embed, axis=-1)
pairplot(embed, labels=labels, palette="tab20")

# %% [markdown]
# ## Run paths
print(f"Running {n_init} random walks from each source node...")

transition_probs = to_markov_matrix(adj)

out_inds = np.where(labels == n_blocks - 1)[0]
source_inds = np.where(labels == 0)[0]


def rw_from_node(s):
    paths = []
    rw = RandomWalk(transition_probs,
                    stop_nodes=out_inds,
Esempio n. 12
0
figsize = (20, 20)
sns.clustermap(edgesum_df, figsize=figsize)
plt.title("Edgesum matrix, single linkage euclidean dendrograms", loc="center")


screeplot(edgesum_mat, cumulative=False, title="Edgesum matrix screeplot")
plt.ylim((0, 0.5))

pca = PCA(n_components=3)
edgesum_pcs = pca.fit_transform(edgesum_mat)

var_exp = np.sum(pca.explained_variance_ratio_)

pairplot(
    edgesum_pcs, height=5, alpha=0.3, title=f"Edgesum PCs, {var_exp} variance explained"
)
pairplot(
    edgesum_pcs,
    labels=class_labels,
    height=5,
    alpha=0.3,
    title="Edgesum PCs colored by known types",
    palette=palette,
)

pairplot(
    edgesum_mat[:, [0, 1, 4, 5]],
    labels=class_labels,
    height=5,
    alpha=0.3,
Esempio n. 13
0
    dc = np.random.beta(2, 5, n[i])
    dc /= dc.sum()
    dcs.append(dc)
dcs = np.concatenate(dcs)
adj, labels = sbm(n, p, directed=True, dc=dcs, return_labels=True)
heatmap(adj, cbar=False, sort_nodes=True, inner_hier_labels=labels)

#%%
from graspy.embed import AdjacencySpectralEmbed

ase = AdjacencySpectralEmbed(n_components=3)
embed = ase.fit_transform(adj)
embed = np.concatenate(embed, axis=-1)

#%%
pairplot(embed, labels=labels)

# %% [markdown]
# ##

norm_embed = embed / np.linalg.norm(embed, axis=1)[:, None]
pairplot(norm_embed, labels=labels)

# %% [markdown]
# ##
import matplotlib.pyplot as plt

n_dim = norm_embed.shape[1]
fig, axs = plt.subplots(n_dim, n_dim, figsize=(10, 10))
for i in range(n_dim):
    for j in range(n_dim):
Esempio n. 14
0
plt.tight_layout()
annotate_arrow(ax[0])
savefig("4color_mb", fmt="png", dpi=150, bbox_inches="tight", pad_inches=0.5)

#%% Embed the graphs for the mushroom body right
n_components = 4

ase_latent = ase(sum_adj, n_components)
omni_latent = omni(color_adjs, n_components)
ase_cat_latent = ase_concatenate(color_adjs, n_components)
degree_mat = degree(color_adjs)
# latents = [ase_latent, omni_latent, ase_cat_latent, degree_mat]
latents = [ase_latent, omni_latent, degree_mat]

for latent, name in zip(latents, EMBED_FUNC_NAMES):
    pairplot(latent, labels=simple_class_labels, title=name)

#%%

# degree_clusts = [SphericalKMeans, KMeans]
# for k in range(2, 12):
#     print(k)
#     est = SphericalKMeans(n_clusters=k)
#     pred_labels = est.fit_predict(deg_mat)
#     ari = adjusted_rand_score(simple_class_labels, pred_labels)
#     print(ari)

# print()

# for k in range(2, 12):
#     print(k)
Esempio n. 15
0
# nci_hc_complete_4_clusters = cut_tree(
#     nci_hc_complete, n_clusters=4
# )  # Printing transpose just for space

# pd.crosstab(
#     index=nci_data.index,
#     columns=nci_hc_complete_4_clusters.T[0],
#     rownames=["Cancer Type"],
#     colnames=["Cluster"],
# )
# %% [markdown]
# #
from sklearn.decomposition import PCA

embedding = PCA(n_components=8).fit_transform(raw_hist_data)
pairplot(embedding, labels=dfs[0]["Merge Class"].values, palette=CLASS_COLOR_DICT)
# %% [markdown] 
# # 
from sklearn.cluster import AgglomerativeClustering

agg = AgglomerativeClustering(n_clusters=10, affinity="euclidean", linkage="average")
labels = agg.fit_predict(raw_hist_data)
pairplot(embedding, labels=labels, palette=cc.glasbey_light)

# %% [markdown]
# #

from graspy.cluster import AutoGMMCluster

agm = AutoGMMCluster(min_components=2, max_components=20, n_jobs=-1
agm.fit(embedding)
Esempio n. 16
0
def cluster_func(k, seed):
    np.random.seed(seed)
    run_name = f"k = {k}, {cluster}, {embed}, right hemisphere (A to D), PTR, raw"
    print(run_name)
    print()

    # Cluster
    gmm = GaussianCluster(min_components=k, max_components=k, **gmm_params)
    gmm.fit(latent)
    pred_labels = gmm.predict(latent)

    # ARI
    base_dict = {
        "K": k,
        "Cluster": cluster,
        "Embed": embed,
        "Method": f"{cluster} o {embed}",
        "Score": gmm.model_.score(latent),
    }
    mb_ari = sub_ari(known_inds, mb_labels, pred_labels)
    mb_ari_dict = base_dict.copy()
    mb_ari_dict["ARI"] = mb_ari
    mb_ari_dict["Metric"] = "MB ARI"
    out_dicts.append(mb_ari_dict)

    simple_ari = sub_ari(known_inds, simple_class_labels, pred_labels)
    simple_ari_dict = base_dict.copy()
    simple_ari_dict["ARI"] = simple_ari
    simple_ari_dict["Metric"] = "Simple ARI"
    out_dicts.append(simple_ari_dict)

    full_ari = adjusted_rand_score(class_labels, pred_labels)
    full_ari_dict = base_dict.copy()
    full_ari_dict["ARI"] = full_ari
    full_ari_dict["Metric"] = "Full ARI"
    out_dicts.append(full_ari_dict)

    save_name = f"k{k}-{cluster}-{embed}-right-ad-PTR-raw"

    # Plot embedding
    pairplot(latent, labels=pred_labels, title=run_name)
    # stashfig("latent-" + save_name)

    # Plot everything else
    prob_df = get_sbm_prob(adj, pred_labels)
    block_sum_df = get_block_edgesums(adj, pred_labels, prob_df.columns.values)

    clustergram(adj, latent, prob_df, block_sum_df, simple_class_labels,
                pred_labels)
    plt.suptitle(run_name, fontsize=40)
    stashfig("clustergram-" + save_name)

    # output skeletons
    _, colormap, pal = stashskel(save_name,
                                 skeleton_labels,
                                 pred_labels,
                                 palette="viridis",
                                 multiout=True)

    sns.set_context("talk")
    palplot(k, cmap="viridis")

    stashfig("palplot-" + save_name)

    # save dict colormapping
    filename = (Path("./maggot_models/notebooks/outs") / Path(FNAME) /
                str("colormap-" + save_name + ".json"))
    with open(filename, "w") as fout:
        json.dump(colormap, fout)

    stashskel(save_name,
              skeleton_labels,
              pred_labels,
              palette="viridis",
              multiout=False)
Esempio n. 17
0
# blocks 0, 1 differ only in their inputs, not their outputs
B = np.array([
    [0.1, 0.1, 0.2, 0.05],
    [0.1, 0.1, 0.2, 0.05],
    [0.35, 0.15, 0.1, 0.1],
    [0.1, 0.05, 0.3, 0.4],
])
sns.heatmap(B, square=True, annot=True)
sbm_sample, sbm_labels = sbm([100, 100, 100, 100],
                             B,
                             directed=True,
                             return_labels=True)
ase = AdjacencySpectralEmbed()
out_embed, in_embed = ase.fit_transform(sbm_sample)
pairplot(out_embed, sbm_labels)  # don't see separation between [0, 1]
pairplot(in_embed, sbm_labels)  # do see separation between [0, 1]
# from this we can conclude that the "right" embedding or right singular vectors are the
# ones corresponding to input
# (out, in)

# %% [markdown]
# ## Options for the embedding
# - ASE and procrustes (not shown here)
# - Bilateral OMNI on G, SVD
# - Bilateral OMNI on each of the 4-colors, concatenated, SVD
# - Bilateral OMNI on each of the 4-colors, with regularization, concatenated, SVD
# - Bilateral OMNI jointly with all 4-colors

n_omni_components = 8  # this is used for all of the embedings initially
n_svd_components = 16  # this is for the last step
Esempio n. 18
0
lp_inds, rp_inds = get_paired_inds(meta)
left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]
cat_embed = np.concatenate(joint_embed, axis=-1)
for e in cat_embed:
    e[left_inds] = e[left_inds] @ orthogonal_procrustes(
        e[lp_inds], e[rp_inds])[0]

cat_embed = np.concatenate(cat_embed, axis=-1)
print(select_dimension(cat_embed, n_elbows=3))

U, S, Vt = selectSVD(cat_embed, n_elbows=3)

pg = pairplot(U,
              labels=meta["merge_class"].values,
              palette=CLASS_COLOR_DICT,
              size=20,
              alpha=0.4)
pg._legend.remove()
stashfig("omni-reduced-dim")

# %% [markdown]
# ##

results = crossval_cluster(U,
                           left_inds,
                           right_inds,
                           left_pair_inds=lp_inds,
                           right_pair_inds=rp_inds)

plot_metrics(results)
Esempio n. 19
0
# # Embedding
n_verts = mg.n_verts
sym_adj = mg.adj
side_labels = mg["Hemisphere"]
class_labels = mg["Merge Class"]

latent, laplacian = lse(sym_adj, N_COMPONENTS, regularizer=None, ptr=PTR)
latent_dim = latent.shape[1] // 2
screeplot(
    laplacian,
    title=f"Laplacian scree plot, R-DAD (ZG2 = {latent_dim} + {latent_dim})")
print(f"ZG chose dimension {latent_dim} + {latent_dim}")

plot_latent = np.concatenate(
    (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1)
pairplot(plot_latent, labels=side_labels)

# take the mean for the paired cells, making sure to add back in the unpaired cells
sym_latent = (latent[:n_pairs] + latent[n_pairs:2 * n_pairs]) / 2
sym_latent = np.concatenate((sym_latent, latent[2 * n_pairs:]))
latent = sym_latent

# make new labels
side_labels = np.concatenate((n_pairs * ["P"], side_labels[2 * n_pairs:]))
# this is assuming that the class labels are perfectly matches left right, probs not
class_labels = np.concatenate(
    (class_labels[:n_pairs], class_labels[2 * n_pairs:]))
# skeleton labels are weird for now

plot_latent = np.concatenate(
    (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1)
Esempio n. 20
0
    sns.distplot([len(visits) for visits in from_visit_orders.values()], ax=ax)
    ax.set_title(p)

# %% [markdown]
# #
encoding_df = pd.DataFrame(node_encodings).T
encoding_df = encoding_df.fillna(0)

from sklearn.decomposition import PCA

from graspy.plot import pairplot

embedding = PCA(n_components=8).fit_transform(encoding_df.values)

pairplot(embedding,
         labels=meta["Merge Class"].values,
         palette=CLASS_COLOR_DICT)
stashfig("random-walk-embedding")

# %% [markdown]
# #
encoding_df = pd.DataFrame(node_encodings).T
encoding_df = encoding_df.fillna(0)
encoding_mat = encoding_df.values + np.random.normal(
    loc=0, scale=0.3, size=encoding_df.values.shape)

encoding_df["Merge Class"] = meta["Merge Class"].values

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
sns.scatterplot(
    data=encoding_df,
Esempio n. 21
0
from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed
from graspy.utils import pass_to_ranks
from graspy.plot import pairplot


sum_adj = np.sum(np.array(mb_color_graphs), axis=0)

n_components = 4

#
ptr_adj = pass_to_ranks(sum_adj)
ase = AdjacencySpectralEmbed(n_components=n_components)
sum_latent = ase.fit_transform(ptr_adj)
sum_latent = np.concatenate(sum_latent, axis=-1)
pairplot(sum_latent, labels=mb_class_labels)

ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs]
# graph_sum = [np.sum(a) for a in mb_color_graphs]
# ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)]
omni = OmnibusEmbed(n_components=n_components // 4)
color_latent = omni.fit_transform(ptr_color_adjs)
color_latent = np.concatenate(color_latent, axis=-1)
color_latent = np.concatenate(color_latent, axis=-1)
pairplot(color_latent, labels=mb_class_labels)

from graspy.embed import MultipleASE

mase = MultipleASE(n_components=n_components)
mase_latent = mase.fit_transform(ptr_color_adjs)
mase_latent = np.concatenate(mase_latent, axis=-1)
Esempio n. 22
0
adj = mg.adj
ase_latent = ase(adj, None, True)
print(f"ZG chose {ase_latent.shape[1]//2}")
n_unique = len(np.unique(mg["Class 1"]))
meta_vals = [
    "Class 1",
    "Merge Class",
    "Hemisphere",
    "is_pdiff",
    "is_usplit",
    "is_brain",
]

for meta_val in meta_vals:
    pairplot(
        ase_latent,
        labels=mg[meta_val],
        palette=cc.glasbey_light[:mg.meta[meta_val].nunique()],
        title=meta_val,
    )
    stashfig(meta_val + "-pairplot")

# %% [markdown]
# # Try saving some output
# out_path = Path("maggot_models/notebooks/outs/60.0-BDP-eda-jan-data/objs")
# save_latent = np.concatenate((ase_latent[:, :3], ase_latent[:, 4:-1]), axis=-1)
# print(save_latent.shape)
# save_latent_df = pd.DataFrame(data=save_latent)
# save_latent_df.to_csv(out_path / "save_latent.tsv", sep="\t", header=False)
# mg.meta.to_csv(out_path / "save_meta.tsv", sep="\t")
Esempio n. 23
0
if plot_embed:
    screeplot(hist_mat.astype(float), title="Raw hist mat (full)")
    stashfig("scree-raw-mat" + basename)
    screeplot(log_mat, title="Log hist mat (full)")
    stashfig("scree-log-mat" + basename)

# %% [markdown]
# # Pairplots
if plot_embed:
    pca = PCA(n_components=6)
    embed = pca.fit_transform(log_mat)
    loadings = pca.components_.T
    pg = pairplot(
        embed,
        labels=to_class.values,
        palette=CLASS_COLOR_DICT,
        height=5,
        title="Node response embedding (log)",
    )
    pg._legend.remove()
    stashfig("node-pca-log" + basename)
    pg = pairplot(
        loadings,
        labels=from_class.values,
        height=5,
        title="Source class embedding (log)",
    )
    stashfig("source-pca-log" + basename)

    pca = PCA(n_components=6)
    embed = pca.fit_transform(hist_mat.astype(float))
Esempio n. 24
0
#%%
from graspy.cluster import GaussianCluster
from graspy.plot import pairplot
import numpy as np

n = 100
d = 3

np.random.seed(3)

X1 = np.random.normal(0.5, 0.5, size=(n, d))
X2 = np.random.normal(-0.5, 0.5, size=(n, d))
X3 = np.random.normal(0.8, 0.6, size=(n, d))
X4 = np.random.uniform(0.2, 0.3, size=(n, d))
X = np.vstack((X1, X2, X3, X4))
pairplot(X)

np.random.seed(3)

gclust = GaussianCluster(min_components=2, max_components=2, n_init=1, max_iter=100)
gclust.fit(X)

bic1 = gclust.bic_

np.random.seed(3)

gclust = GaussianCluster(min_components=2, max_components=2, n_init=50, max_iter=100)
gclust.fit(X)

bic2 = gclust.bic_
Esempio n. 25
0
mds = ClassicalMDS(dissimilarity="precomputed")
# mds = MDS(dissimilarity="precomputed", n_components=6, n_init=16, n_jobs=-2)
jaccard_embedding = mds.fit_transform(pdist_sparse)

# %% [markdown]
# #

print("Clustering embedding")
agmm = AutoGMMCluster(min_components=10,
                      max_components=40,
                      affinity="euclidean",
                      linkage="single")
labels = agmm.fit_predict(jaccard_embedding)

pairplot(jaccard_embedding,
         title="AGMM o CMDS o Jaccard o Sensorimotor Paths",
         labels=labels)
savefig("AGMM-CMDS-jaccard-sm-path")

print("Finding mean paths")
mean_paths = []
uni_labels = np.unique(labels)
for ul in uni_labels:
    inds = np.where(labels == ul)[0]
    paths = path_mat[inds, :]
    mean_path = np.array(np.mean(paths, axis=0))
    mean_paths.append(mean_path)
mean_paths = np.squeeze(np.array(mean_paths))

# TODO remove sensory and motor indices from the matrix
ad_signal = scatter_df["Signal flow"].values

plt.figure(figsize=(10, 10))
sns.scatterplot(ad_signal, aa_signal)
plt.xlabel(r"A $\to$ D signal flow")
plt.ylabel(r"A $\to$ D + A $\to$ A signal flow")
plt.show()

plt.figure(figsize=(10, 5))
sns.distplot(aa_signal - ad_signal)
plt.xlabel(r"(A $\to$ D + A $\to$ A signal flow) - (A $\to$ D signal flow)")
plt.ylabel("Frequency")
plt.show()

# %% [markdown]
# # Look at the different 4-color combinations, compute signal flow on each separately
# Plot the distributions of signal flow for the 4-colors against each other
GRAPH_TYPES = ["Gad", "Gaa", "Gdd", "Gda"]

signal_flows = []
for g in GRAPH_TYPES:
    adj = load_everything(g, version=GRAPH_VERSION)
    adj = adj[np.ix_(inds, inds)]
    scatter_df = signal_flow(adj)
    sf = scatter_df["Signal flow"]
    signal_flows.append(sf)

signal_flows = np.array(signal_flows).T
pairplot(signal_flows, col_names=GRAPH_TYPES, height=4)
plt.show()
Esempio n. 27
0
# fig, ax = plt.subplots(1, 1, figsize=(10, 20))
# voltage = voltage_df.values
# log_voltage = np.log10(voltage)
# matrixplot(
#     rank_voltage_df.values,
#     ax=ax,
#     row_meta=meta,
#     row_sort_class=[class_key],
#     tick_rot=45,
# )

# %% [markdown]
# ##
sns.distplot(np.log10(curr_node + 1), kde=False)

# %% [markdown]
# #

pca = PCA(n_components=5)
embed = pca.fit_transform(rank_voltage_df.values)
pg = pairplot(embed, labels=meta[class_key].values, palette=CLASS_COLOR_DICT)
pg._legend.remove()

# %% [markdown]
# #
colors = np.vectorize(CLASS_COLOR_DICT.get)(meta["Merge Class"].values)
sns.clustermap(
    rank_voltage_df.values, row_cluster=True, col_cluster=False, row_colors=colors
)
Esempio n. 28
0
                    n_components=int(np.ceil(np.log2(np.min(X.shape)))))
path_embed = cmds.fit_transform(X)
elbows, elbow_vals = select_dimension(cmds.singular_values_, n_elbows=3)
rng = np.arange(1, len(cmds.singular_values_) + 1)
elbows = np.array(elbows)
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
pc = ax.scatter(elbows, elbow_vals, color="red", label="ZG")
pc.set_zorder(10)
ax.plot(rng, cmds.singular_values_, "o-")
ax.legend()
stashfig("cmds-screeplot" + basename)

# %% [markdown]
# ##

pairplot(path_embed, alpha=0.02)
stashfig("cmds-pairs-all" + basename)
# %% [markdown]
# ##
print("Running AGMM on CMDS embedding")
n_components = 4

agmm = AutoGMMCluster(max_components=40, n_jobs=-2)
pred = agmm.fit_predict(path_embed[:, :n_components])

print(f"Number of clusters: {agmm.n_components_}")

# %% [markdown]
# ##
pairplot(
    path_embed[:, :n_components],
Esempio n. 29
0
palette = dict(zip(np.unique(labels), sns.color_palette("deep", 10)))

for method, embedding in embeddings.items():
    plot_df = make_plot_df(embedding)
    plot_dfs[method] = plot_df

#%% [markdown]
# ## Looking at the spectral embeddings
# In the next few cells, I plot
# - Pairplots
# - Individual eigenvectors, sorted by block and then by node degree w/in block
# - Eigenvector values for each node plotted agains node degree
#%% pairplots
for method, embedding in embeddings.items():
    pairplot(
        embedding, labels=labels, palette=palette, legend_name="Community", title=method
    )

#%% eigenvector plots
for method, plot_df in plot_dfs.items():
    fig, axs = plt.subplots(2, 2, figsize=(16, 8))
    for i, ax in enumerate(axs.ravel()):
        sns.scatterplot(
            data=plot_df,
            x="ind",
            y=i,
            hue="label",
            ax=ax,
            legend=False,
            palette=palette,
            s=10,
Esempio n. 30
0

# %% [markdown]
# # Run clustering using LSE on the sum graph

n_components = 4
gmm_params = {"n_init": N_INIT, "covariance_type": "all"}
out_dicts = []

embed = "LSE"
cluster = "GMM"

lse_latent = lse(adj, 4, regularizer=None)

latent = lse_latent
pairplot(latent, labels=simple_class_labels, title=embed)

for k in range(MIN_CLUSTERS, MAX_CLUSTERS + 1):
    run_name = f"k = {k}, {cluster}, {embed}, right hemisphere (A to D), PTR, raw"
    print(run_name)
    print()

    # Cluster
    gmm = GaussianCluster(min_components=k, max_components=k, **gmm_params)
    gmm.fit(latent)
    pred_labels = gmm.predict(latent)

    # ARI
    base_dict = {
        "K": k,
        "Cluster": cluster,