def pairs(embed): pg = pairplot( embed, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT, size=20, alpha=0.5, ) pg._legend.remove()
def one_iteration(start_labels, class_key="Merge Class"): # generate walks data, bins, classes = random_walk_classes(start_labels, seed=None, class_key=class_key) log_data = np.log10(data + 1) # plot the clustermap path_clustermap(log_data, classes, bins) # embed and plot by known class embedding = PCA(n_components=8).fit_transform(log_data) pairplot(embedding, labels=classes, palette=CLASS_COLOR_DICT) # cluster agm = AutoGMMCluster(min_components=2, max_components=20, n_jobs=-1, verbose=10) pred_labels = agm.fit_predict(embedding) plt.figure() sns.scatterplot(data=agm.results_, x="n_components", y="bic/aic") # plot embedding by cluster pairplot(embedding, labels=pred_labels, palette=cc.glasbey_light) # plot predicted clusters by known class stacked_barplot(pred_labels, classes, color_dict=CLASS_COLOR_DICT) return pred_labels
plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) plt.tight_layout() fig.suptitle(f"Method = {method}", y=1) stashfig(f"procrustes-ase-{method}") # %% [markdown] # ## Try ranking pairs pg = pairplot( norm_embed, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT, diag_kind="hist", ) pg._legend.remove() # %% [markdown] # ## def embedplot(embed): plot_df = pd.DataFrame(data=embed[:, [0, 1]]) plot_df["merge_class"] = meta["merge_class"].values fig, ax = plt.subplots(1, 1, figsize=(10, 10)) sns.scatterplot( data=plot_df, x=0,
inds = np.random.choice(len(paths), size=subsample, replace=False) new_paths = [] for i, p in enumerate(paths): if i in inds: new_paths.append(p) paths = new_paths print(f"Number of paths after subsampling: {len(paths)}") # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(pass_to_ranks(adj)) embed = np.concatenate(embed, axis=-1) pairplot(embed, labels=labels) # %% [markdown] # ## Show 2 dimensions of pairwise cosine embedding pdist = pairwise_distances(embed, metric="cosine") # %% [markdown] # ## manifold = TSNE(metric="precomputed") # manifold = ClassicalMDS(n_components=2, dissimilarity="precomputed") cos_embed = manifold.fit_transform(pdist) # %% [markdown] # ## plot_df = pd.DataFrame(data=cos_embed)
# "tPNs": "PN", # "vPNs": "PN", # "Unidentified": "Other", # "Other": "Other", # } # mb_labels = np.array(itemgetter(*class_labels)(name_map)) # known_inds = np.where(np.logical_or(mb_labels == "MB", mb_labels == "PN"))[0] # %% [markdown] # # Run clustering using LSE on the sum graph n_verts = adj.shape[0] latent = lse(adj, n_components, regularizer=None) pairplot(latent, labels=simple_class_labels, title=embed) k_list = list(range(MIN_CLUSTERS, MAX_CLUSTERS + 1)) n_runs = len(k_list) out_dicts = [] bin_adj = binarize(adj) last_pred_labels = np.zeros(n_verts) if cluster == "GMM": ClusterModel = GaussianCluster elif cluster == "AutoGMM": ClusterModel = AutoGMMCluster for k in k_list:
# plot degree sequence d_sort = np.argsort(degrees)[::-1] degrees = degrees[d_sort] plt.figure(figsize=(10, 5)) sns.scatterplot(x=range(len(degrees)), y=degrees, s=30, linewidth=0) known_inds = np.where(class_labels != "Unk")[0] # %% [markdown] # # Run clustering using LSE on the sum graph n_verts = adj.shape[0] latent = lse(adj, n_components, regularizer=None) pairplot(latent, labels=class_labels, title=embed) k_list = list(range(MIN_CLUSTERS, MAX_CLUSTERS + 1)) n_runs = len(k_list) out_dicts = [] bin_adj = binarize(adj) last_pred_labels = np.zeros(n_verts) if cluster == "GMM": ClusterModel = GaussianCluster elif cluster == "AutoGMM": ClusterModel = AutoGMMCluster for k in k_list:
for i in range(full_response_mat.shape[1]): plt.figure() plt.plot(full_response_mat[:, i]) # %% [markdown] # # from sklearn.decomposition import PCA in_deg = adj.sum(axis=0) in_deg[in_deg == 0] = 1 full_response_mat = full_response_mat / in_deg[:, np.newaxis] full_response_mat[np.isinf(full_response_mat)] = 0 pca = PCA(n_components=4) latent = pca.fit_transform(full_response_mat) pairplot(latent, labels=class_labels) # %% [markdown] # # is_sensory = np.vectorize(lambda s: s in sensory_classes)(class_labels) inds = np.arange(len(class_labels)) sensory_inds = inds[is_sensory] response_mats = [] n_timesteps = 1 for s in sensory_inds: response_mat = np.zeros((n_verts, n_timesteps)) state_vec = np.zeros(len(sensory_labels)) state_vec[s] = 1 for t in range(n_timesteps): new_state_vec = trans_mat @ state_vec
if plot_embed: screeplot(hist_mat.astype(float), title="Raw hist mat (full)") stashfig("scree-raw-mat" + basename) screeplot(log_mat, title="Log hist mat (full)") stashfig("scree-log-mat" + basename) # %% [markdown] # # Pairplots if plot_embed: pca = PCA(n_components=6) embed = pca.fit_transform(log_mat) loadings = pca.components_.T pg = pairplot( embed, labels=to_class.values, palette=CLASS_COLOR_DICT, height=5, title="Node response embedding (log)", ) pg._legend.remove() stashfig("node-pca-log" + basename) pg = pairplot( loadings, labels=from_class.values, height=5, title="Source class embedding (log)", ) stashfig("source-pca-log" + basename) pca = PCA(n_components=6) embed = pca.fit_transform(hist_mat.astype(float))
sns.set_context("talk", font_scale=1) pn_types = ["ORN mPNs", "ORN uPNs", "tPNs", "vPNs"] pn_input_props = {} fig, ax = plt.subplots(2, 2, sharex=True, figsize=(15, 10)) ax = ax.ravel() for i, t in enumerate(pn_types): pn_prop_input = calculate_from_class_input(t, class_ind_map, adj) pn_input_props[t] = pn_prop_input sns.distplot(pn_prop_input[pn_prop_input > 0], ax=ax[i], norm_hist=True) ax[i].set_title(t) pn_prop_input_mat = np.array(list(pn_input_props.values())).T pairplot(pn_prop_input_mat, col_names=pn_types) #%% LHN - someone who received >5% input from at least one projection neuron type sns.set_context("talk", font_scale=1) max_pn_prop_input = pn_prop_input_mat.max(axis=1) thresh_range = np.linspace(0, 0.35, num=50) def var_objective(input, class1_inds, class2_inds): class1_var = np.var(input[class1_inds]) class2_var = np.var(input[class2_inds]) objective = class1_var + class2_var return objective
joint = np.concatenate((embed, full_hop_hist.T), axis=1) from graspy.plot import pairplot from sklearn.decomposition import PCA from sklearn.decomposition import TruncatedSVD from sklearn.preprocessing import StandardScaler from src.visualization import screeplot joint = StandardScaler(with_mean=False, with_std=True).fit_transform(joint) screeplot(joint) embedder = TruncatedSVD(n_components=4) joint_embed = embedder.fit_transform(joint) pg = pairplot(joint_embed, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT) pg._legend.remove() # %% meta["inds"] = range(len(meta)) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] lp_inds, rp_inds = get_paired_inds(meta) results = crossval_cluster( joint_embed, left_inds, right_inds, min_clusters=2, max_clusters=20,
matrixplot( path_indicator_mat[:50, :50], plot_type="scattermap", sizes=(0.2, 0.2), hue="weight", palette=sns.color_palette("husl", n_colors=10), ax=ax, ) # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(adj) embed = np.concatenate(embed, axis=-1) pairplot(embed, labels=labels, palette="tab20") # %% [markdown] # ## Run paths print(f"Running {n_init} random walks from each source node...") transition_probs = to_markov_matrix(adj) out_inds = np.where(labels == n_blocks - 1)[0] source_inds = np.where(labels == 0)[0] def rw_from_node(s): paths = [] rw = RandomWalk(transition_probs, stop_nodes=out_inds,
figsize = (20, 20) sns.clustermap(edgesum_df, figsize=figsize) plt.title("Edgesum matrix, single linkage euclidean dendrograms", loc="center") screeplot(edgesum_mat, cumulative=False, title="Edgesum matrix screeplot") plt.ylim((0, 0.5)) pca = PCA(n_components=3) edgesum_pcs = pca.fit_transform(edgesum_mat) var_exp = np.sum(pca.explained_variance_ratio_) pairplot( edgesum_pcs, height=5, alpha=0.3, title=f"Edgesum PCs, {var_exp} variance explained" ) pairplot( edgesum_pcs, labels=class_labels, height=5, alpha=0.3, title="Edgesum PCs colored by known types", palette=palette, ) pairplot( edgesum_mat[:, [0, 1, 4, 5]], labels=class_labels, height=5, alpha=0.3,
dc = np.random.beta(2, 5, n[i]) dc /= dc.sum() dcs.append(dc) dcs = np.concatenate(dcs) adj, labels = sbm(n, p, directed=True, dc=dcs, return_labels=True) heatmap(adj, cbar=False, sort_nodes=True, inner_hier_labels=labels) #%% from graspy.embed import AdjacencySpectralEmbed ase = AdjacencySpectralEmbed(n_components=3) embed = ase.fit_transform(adj) embed = np.concatenate(embed, axis=-1) #%% pairplot(embed, labels=labels) # %% [markdown] # ## norm_embed = embed / np.linalg.norm(embed, axis=1)[:, None] pairplot(norm_embed, labels=labels) # %% [markdown] # ## import matplotlib.pyplot as plt n_dim = norm_embed.shape[1] fig, axs = plt.subplots(n_dim, n_dim, figsize=(10, 10)) for i in range(n_dim): for j in range(n_dim):
plt.tight_layout() annotate_arrow(ax[0]) savefig("4color_mb", fmt="png", dpi=150, bbox_inches="tight", pad_inches=0.5) #%% Embed the graphs for the mushroom body right n_components = 4 ase_latent = ase(sum_adj, n_components) omni_latent = omni(color_adjs, n_components) ase_cat_latent = ase_concatenate(color_adjs, n_components) degree_mat = degree(color_adjs) # latents = [ase_latent, omni_latent, ase_cat_latent, degree_mat] latents = [ase_latent, omni_latent, degree_mat] for latent, name in zip(latents, EMBED_FUNC_NAMES): pairplot(latent, labels=simple_class_labels, title=name) #%% # degree_clusts = [SphericalKMeans, KMeans] # for k in range(2, 12): # print(k) # est = SphericalKMeans(n_clusters=k) # pred_labels = est.fit_predict(deg_mat) # ari = adjusted_rand_score(simple_class_labels, pred_labels) # print(ari) # print() # for k in range(2, 12): # print(k)
# nci_hc_complete_4_clusters = cut_tree( # nci_hc_complete, n_clusters=4 # ) # Printing transpose just for space # pd.crosstab( # index=nci_data.index, # columns=nci_hc_complete_4_clusters.T[0], # rownames=["Cancer Type"], # colnames=["Cluster"], # ) # %% [markdown] # # from sklearn.decomposition import PCA embedding = PCA(n_components=8).fit_transform(raw_hist_data) pairplot(embedding, labels=dfs[0]["Merge Class"].values, palette=CLASS_COLOR_DICT) # %% [markdown] # # from sklearn.cluster import AgglomerativeClustering agg = AgglomerativeClustering(n_clusters=10, affinity="euclidean", linkage="average") labels = agg.fit_predict(raw_hist_data) pairplot(embedding, labels=labels, palette=cc.glasbey_light) # %% [markdown] # # from graspy.cluster import AutoGMMCluster agm = AutoGMMCluster(min_components=2, max_components=20, n_jobs=-1 agm.fit(embedding)
def cluster_func(k, seed): np.random.seed(seed) run_name = f"k = {k}, {cluster}, {embed}, right hemisphere (A to D), PTR, raw" print(run_name) print() # Cluster gmm = GaussianCluster(min_components=k, max_components=k, **gmm_params) gmm.fit(latent) pred_labels = gmm.predict(latent) # ARI base_dict = { "K": k, "Cluster": cluster, "Embed": embed, "Method": f"{cluster} o {embed}", "Score": gmm.model_.score(latent), } mb_ari = sub_ari(known_inds, mb_labels, pred_labels) mb_ari_dict = base_dict.copy() mb_ari_dict["ARI"] = mb_ari mb_ari_dict["Metric"] = "MB ARI" out_dicts.append(mb_ari_dict) simple_ari = sub_ari(known_inds, simple_class_labels, pred_labels) simple_ari_dict = base_dict.copy() simple_ari_dict["ARI"] = simple_ari simple_ari_dict["Metric"] = "Simple ARI" out_dicts.append(simple_ari_dict) full_ari = adjusted_rand_score(class_labels, pred_labels) full_ari_dict = base_dict.copy() full_ari_dict["ARI"] = full_ari full_ari_dict["Metric"] = "Full ARI" out_dicts.append(full_ari_dict) save_name = f"k{k}-{cluster}-{embed}-right-ad-PTR-raw" # Plot embedding pairplot(latent, labels=pred_labels, title=run_name) # stashfig("latent-" + save_name) # Plot everything else prob_df = get_sbm_prob(adj, pred_labels) block_sum_df = get_block_edgesums(adj, pred_labels, prob_df.columns.values) clustergram(adj, latent, prob_df, block_sum_df, simple_class_labels, pred_labels) plt.suptitle(run_name, fontsize=40) stashfig("clustergram-" + save_name) # output skeletons _, colormap, pal = stashskel(save_name, skeleton_labels, pred_labels, palette="viridis", multiout=True) sns.set_context("talk") palplot(k, cmap="viridis") stashfig("palplot-" + save_name) # save dict colormapping filename = (Path("./maggot_models/notebooks/outs") / Path(FNAME) / str("colormap-" + save_name + ".json")) with open(filename, "w") as fout: json.dump(colormap, fout) stashskel(save_name, skeleton_labels, pred_labels, palette="viridis", multiout=False)
# blocks 0, 1 differ only in their inputs, not their outputs B = np.array([ [0.1, 0.1, 0.2, 0.05], [0.1, 0.1, 0.2, 0.05], [0.35, 0.15, 0.1, 0.1], [0.1, 0.05, 0.3, 0.4], ]) sns.heatmap(B, square=True, annot=True) sbm_sample, sbm_labels = sbm([100, 100, 100, 100], B, directed=True, return_labels=True) ase = AdjacencySpectralEmbed() out_embed, in_embed = ase.fit_transform(sbm_sample) pairplot(out_embed, sbm_labels) # don't see separation between [0, 1] pairplot(in_embed, sbm_labels) # do see separation between [0, 1] # from this we can conclude that the "right" embedding or right singular vectors are the # ones corresponding to input # (out, in) # %% [markdown] # ## Options for the embedding # - ASE and procrustes (not shown here) # - Bilateral OMNI on G, SVD # - Bilateral OMNI on each of the 4-colors, concatenated, SVD # - Bilateral OMNI on each of the 4-colors, with regularization, concatenated, SVD # - Bilateral OMNI jointly with all 4-colors n_omni_components = 8 # this is used for all of the embedings initially n_svd_components = 16 # this is for the last step
lp_inds, rp_inds = get_paired_inds(meta) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] cat_embed = np.concatenate(joint_embed, axis=-1) for e in cat_embed: e[left_inds] = e[left_inds] @ orthogonal_procrustes( e[lp_inds], e[rp_inds])[0] cat_embed = np.concatenate(cat_embed, axis=-1) print(select_dimension(cat_embed, n_elbows=3)) U, S, Vt = selectSVD(cat_embed, n_elbows=3) pg = pairplot(U, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT, size=20, alpha=0.4) pg._legend.remove() stashfig("omni-reduced-dim") # %% [markdown] # ## results = crossval_cluster(U, left_inds, right_inds, left_pair_inds=lp_inds, right_pair_inds=rp_inds) plot_metrics(results)
# # Embedding n_verts = mg.n_verts sym_adj = mg.adj side_labels = mg["Hemisphere"] class_labels = mg["Merge Class"] latent, laplacian = lse(sym_adj, N_COMPONENTS, regularizer=None, ptr=PTR) latent_dim = latent.shape[1] // 2 screeplot( laplacian, title=f"Laplacian scree plot, R-DAD (ZG2 = {latent_dim} + {latent_dim})") print(f"ZG chose dimension {latent_dim} + {latent_dim}") plot_latent = np.concatenate( (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1) pairplot(plot_latent, labels=side_labels) # take the mean for the paired cells, making sure to add back in the unpaired cells sym_latent = (latent[:n_pairs] + latent[n_pairs:2 * n_pairs]) / 2 sym_latent = np.concatenate((sym_latent, latent[2 * n_pairs:])) latent = sym_latent # make new labels side_labels = np.concatenate((n_pairs * ["P"], side_labels[2 * n_pairs:])) # this is assuming that the class labels are perfectly matches left right, probs not class_labels = np.concatenate( (class_labels[:n_pairs], class_labels[2 * n_pairs:])) # skeleton labels are weird for now plot_latent = np.concatenate( (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1)
sns.distplot([len(visits) for visits in from_visit_orders.values()], ax=ax) ax.set_title(p) # %% [markdown] # # encoding_df = pd.DataFrame(node_encodings).T encoding_df = encoding_df.fillna(0) from sklearn.decomposition import PCA from graspy.plot import pairplot embedding = PCA(n_components=8).fit_transform(encoding_df.values) pairplot(embedding, labels=meta["Merge Class"].values, palette=CLASS_COLOR_DICT) stashfig("random-walk-embedding") # %% [markdown] # # encoding_df = pd.DataFrame(node_encodings).T encoding_df = encoding_df.fillna(0) encoding_mat = encoding_df.values + np.random.normal( loc=0, scale=0.3, size=encoding_df.values.shape) encoding_df["Merge Class"] = meta["Merge Class"].values fig, ax = plt.subplots(1, 1, figsize=(10, 10)) sns.scatterplot( data=encoding_df,
from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed from graspy.utils import pass_to_ranks from graspy.plot import pairplot sum_adj = np.sum(np.array(mb_color_graphs), axis=0) n_components = 4 # ptr_adj = pass_to_ranks(sum_adj) ase = AdjacencySpectralEmbed(n_components=n_components) sum_latent = ase.fit_transform(ptr_adj) sum_latent = np.concatenate(sum_latent, axis=-1) pairplot(sum_latent, labels=mb_class_labels) ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs] # graph_sum = [np.sum(a) for a in mb_color_graphs] # ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)] omni = OmnibusEmbed(n_components=n_components // 4) color_latent = omni.fit_transform(ptr_color_adjs) color_latent = np.concatenate(color_latent, axis=-1) color_latent = np.concatenate(color_latent, axis=-1) pairplot(color_latent, labels=mb_class_labels) from graspy.embed import MultipleASE mase = MultipleASE(n_components=n_components) mase_latent = mase.fit_transform(ptr_color_adjs) mase_latent = np.concatenate(mase_latent, axis=-1)
adj = mg.adj ase_latent = ase(adj, None, True) print(f"ZG chose {ase_latent.shape[1]//2}") n_unique = len(np.unique(mg["Class 1"])) meta_vals = [ "Class 1", "Merge Class", "Hemisphere", "is_pdiff", "is_usplit", "is_brain", ] for meta_val in meta_vals: pairplot( ase_latent, labels=mg[meta_val], palette=cc.glasbey_light[:mg.meta[meta_val].nunique()], title=meta_val, ) stashfig(meta_val + "-pairplot") # %% [markdown] # # Try saving some output # out_path = Path("maggot_models/notebooks/outs/60.0-BDP-eda-jan-data/objs") # save_latent = np.concatenate((ase_latent[:, :3], ase_latent[:, 4:-1]), axis=-1) # print(save_latent.shape) # save_latent_df = pd.DataFrame(data=save_latent) # save_latent_df.to_csv(out_path / "save_latent.tsv", sep="\t", header=False) # mg.meta.to_csv(out_path / "save_meta.tsv", sep="\t")
#%% from graspy.cluster import GaussianCluster from graspy.plot import pairplot import numpy as np n = 100 d = 3 np.random.seed(3) X1 = np.random.normal(0.5, 0.5, size=(n, d)) X2 = np.random.normal(-0.5, 0.5, size=(n, d)) X3 = np.random.normal(0.8, 0.6, size=(n, d)) X4 = np.random.uniform(0.2, 0.3, size=(n, d)) X = np.vstack((X1, X2, X3, X4)) pairplot(X) np.random.seed(3) gclust = GaussianCluster(min_components=2, max_components=2, n_init=1, max_iter=100) gclust.fit(X) bic1 = gclust.bic_ np.random.seed(3) gclust = GaussianCluster(min_components=2, max_components=2, n_init=50, max_iter=100) gclust.fit(X) bic2 = gclust.bic_
mds = ClassicalMDS(dissimilarity="precomputed") # mds = MDS(dissimilarity="precomputed", n_components=6, n_init=16, n_jobs=-2) jaccard_embedding = mds.fit_transform(pdist_sparse) # %% [markdown] # # print("Clustering embedding") agmm = AutoGMMCluster(min_components=10, max_components=40, affinity="euclidean", linkage="single") labels = agmm.fit_predict(jaccard_embedding) pairplot(jaccard_embedding, title="AGMM o CMDS o Jaccard o Sensorimotor Paths", labels=labels) savefig("AGMM-CMDS-jaccard-sm-path") print("Finding mean paths") mean_paths = [] uni_labels = np.unique(labels) for ul in uni_labels: inds = np.where(labels == ul)[0] paths = path_mat[inds, :] mean_path = np.array(np.mean(paths, axis=0)) mean_paths.append(mean_path) mean_paths = np.squeeze(np.array(mean_paths)) # TODO remove sensory and motor indices from the matrix
ad_signal = scatter_df["Signal flow"].values plt.figure(figsize=(10, 10)) sns.scatterplot(ad_signal, aa_signal) plt.xlabel(r"A $\to$ D signal flow") plt.ylabel(r"A $\to$ D + A $\to$ A signal flow") plt.show() plt.figure(figsize=(10, 5)) sns.distplot(aa_signal - ad_signal) plt.xlabel(r"(A $\to$ D + A $\to$ A signal flow) - (A $\to$ D signal flow)") plt.ylabel("Frequency") plt.show() # %% [markdown] # # Look at the different 4-color combinations, compute signal flow on each separately # Plot the distributions of signal flow for the 4-colors against each other GRAPH_TYPES = ["Gad", "Gaa", "Gdd", "Gda"] signal_flows = [] for g in GRAPH_TYPES: adj = load_everything(g, version=GRAPH_VERSION) adj = adj[np.ix_(inds, inds)] scatter_df = signal_flow(adj) sf = scatter_df["Signal flow"] signal_flows.append(sf) signal_flows = np.array(signal_flows).T pairplot(signal_flows, col_names=GRAPH_TYPES, height=4) plt.show()
# fig, ax = plt.subplots(1, 1, figsize=(10, 20)) # voltage = voltage_df.values # log_voltage = np.log10(voltage) # matrixplot( # rank_voltage_df.values, # ax=ax, # row_meta=meta, # row_sort_class=[class_key], # tick_rot=45, # ) # %% [markdown] # ## sns.distplot(np.log10(curr_node + 1), kde=False) # %% [markdown] # # pca = PCA(n_components=5) embed = pca.fit_transform(rank_voltage_df.values) pg = pairplot(embed, labels=meta[class_key].values, palette=CLASS_COLOR_DICT) pg._legend.remove() # %% [markdown] # # colors = np.vectorize(CLASS_COLOR_DICT.get)(meta["Merge Class"].values) sns.clustermap( rank_voltage_df.values, row_cluster=True, col_cluster=False, row_colors=colors )
n_components=int(np.ceil(np.log2(np.min(X.shape))))) path_embed = cmds.fit_transform(X) elbows, elbow_vals = select_dimension(cmds.singular_values_, n_elbows=3) rng = np.arange(1, len(cmds.singular_values_) + 1) elbows = np.array(elbows) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) pc = ax.scatter(elbows, elbow_vals, color="red", label="ZG") pc.set_zorder(10) ax.plot(rng, cmds.singular_values_, "o-") ax.legend() stashfig("cmds-screeplot" + basename) # %% [markdown] # ## pairplot(path_embed, alpha=0.02) stashfig("cmds-pairs-all" + basename) # %% [markdown] # ## print("Running AGMM on CMDS embedding") n_components = 4 agmm = AutoGMMCluster(max_components=40, n_jobs=-2) pred = agmm.fit_predict(path_embed[:, :n_components]) print(f"Number of clusters: {agmm.n_components_}") # %% [markdown] # ## pairplot( path_embed[:, :n_components],
palette = dict(zip(np.unique(labels), sns.color_palette("deep", 10))) for method, embedding in embeddings.items(): plot_df = make_plot_df(embedding) plot_dfs[method] = plot_df #%% [markdown] # ## Looking at the spectral embeddings # In the next few cells, I plot # - Pairplots # - Individual eigenvectors, sorted by block and then by node degree w/in block # - Eigenvector values for each node plotted agains node degree #%% pairplots for method, embedding in embeddings.items(): pairplot( embedding, labels=labels, palette=palette, legend_name="Community", title=method ) #%% eigenvector plots for method, plot_df in plot_dfs.items(): fig, axs = plt.subplots(2, 2, figsize=(16, 8)) for i, ax in enumerate(axs.ravel()): sns.scatterplot( data=plot_df, x="ind", y=i, hue="label", ax=ax, legend=False, palette=palette, s=10,
# %% [markdown] # # Run clustering using LSE on the sum graph n_components = 4 gmm_params = {"n_init": N_INIT, "covariance_type": "all"} out_dicts = [] embed = "LSE" cluster = "GMM" lse_latent = lse(adj, 4, regularizer=None) latent = lse_latent pairplot(latent, labels=simple_class_labels, title=embed) for k in range(MIN_CLUSTERS, MAX_CLUSTERS + 1): run_name = f"k = {k}, {cluster}, {embed}, right hemisphere (A to D), PTR, raw" print(run_name) print() # Cluster gmm = GaussianCluster(min_components=k, max_components=k, **gmm_params) gmm.fit(latent) pred_labels = gmm.predict(latent) # ARI base_dict = { "K": k, "Cluster": cluster,