mg = MetaGraph(thresh_g, weight="max_norm_weight") meta = mg.meta adj = mg.adj.copy() # colsums = np.sum(adj, axis=0) # colsums[colsums == 0] = 1 # adj = adj / colsums[np.newaxis, :] adj = pass_to_ranks(adj) if use_spl: adj = graph_shortest_path(adj) if plus_c: adj += np.min(adj) if embed == "lse": latent = lse(adj, None, ptr=False) elif embed == "ase": latent = ase(adj, None, ptr=False) rot_latent, diff = procrustes_match(latent, meta) rot_latent = latent n_components = latent.shape[1] plot_df = pd.DataFrame(data=rot_latent) plot_df["Class"] = mg["Class 1"] fig, ax = plt.subplots(1, 1, figsize=(10, 10)) sns.scatterplot(x=0, y=1, data=plot_df, hue="Class", legend=False, ax=ax) ax.set_title(f"Residual F. norm = {diff}, threshold = {threshold}") left_paired_inds, right_paired_inds = get_paired_inds(meta) temp_neigh_probs = compute_neighbors_at_k(
mg.make_lcc() print(f"Removed {n_verts - mg.n_verts} when finding the LCC") # old_n_verts = sym_adj.shape[0] # sym_adj, class_labels, side_labels = preprocess_graph( # sym_adj, class_labels, side_labels # ) # n_verts = sym_adj.shape[0] # print(f"Removed {old_n_verts - n_verts} nodes") # %% [markdown] # # Embedding n_verts = mg.n_verts sym_adj = mg.adj side_labels = mg["Hemisphere"] class_labels = mg["Merge Class"] latent, laplacian = lse(sym_adj, N_COMPONENTS, regularizer=None, ptr=PTR) latent_dim = latent.shape[1] // 2 screeplot( laplacian, title=f"Laplacian scree plot, R-DAD (ZG2 = {latent_dim} + {latent_dim})") print(f"ZG chose dimension {latent_dim} + {latent_dim}") plot_latent = np.concatenate( (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1) pairplot(plot_latent, labels=side_labels) # take the mean for the paired cells, making sure to add back in the unpaired cells sym_latent = (latent[:n_pairs] + latent[n_pairs:2 * n_pairs]) / 2 sym_latent = np.concatenate((sym_latent, latent[2 * n_pairs:])) latent = sym_latent
# select the right hemisphere if ONLY_RIGHT: right_inds = np.where(side_labels == "R")[0] adj = adj[np.ix_(right_inds, right_inds)] class_labels = class_labels[right_inds] skeleton_labels = skeleton_labels[right_inds] adj, class_labels, skeleton_labels = preprocess_graph(adj, class_labels, skeleton_labels) known_inds = np.where(class_labels != "Unk")[0] # %% [markdown] # # Embedding n_verts = adj.shape[0] latent = lse(adj, N_COMPONENTS, regularizer=None, ptr=PTR) # pairplot(latent, labels=class_labels, title=embed) latent_dim = latent.shape[1] // 2 print(f"ZG chose dimension {latent_dim} + {latent_dim}") # %% [markdown] # # Fitting divisive cluster model start = timer() dc = DivisiveCluster(n_init=N_INIT, cluster_method=CLUSTER_METHOD) dc.fit(latent) end = end = timer() print() print(f"DivisiveCluster took {(end - start)/60.0} minutes to fit") print() dc.print_tree(print_val="bic_ratio") pred_labels = dc.predict(latent)
sym_mg.adj, inner_hier_labels=sym_mg["Class 1"], outer_hier_labels=sym_mg["Hemisphere"], figsize=(30, 30), hier_label_fontsize=5, transform="binarize", cbar=False, ) stashfig("heatmap-after-mods") # %% [markdown] # # # ad_norm_mg, n_pairs = pair_augment(ad_norm_mg) # ad_norm_mg = max_symmetrize(ad_norm_mg, n_pairs) # ad_norm_mg.make_lcc() ad_norm_lse_latent = lse(sym_mg.adj, n_components=None) # plot_latent_sweep(ad_norm_lse_latent, n_pairs) # remove_inds = [2, 7, 10, 15] # ad_norm_lse_latent = remove_cols(ad_norm_lse_latent, remove_inds) # %% [markdown] # # from scipy.linalg import orthogonal_procrustes left_latent = latent[:n_pairs, :] right_latent = latent[n_pairs:2 * n_pairs, :] R, scalar = orthogonal_procrustes(left_latent, right_latent) # %% [markdown] # # ad_raw_mg = load_metagraph("Gad")
degrees = degrees[d_sort] plt.figure(figsize=(10, 5)) sns.scatterplot(x=range(len(degrees)), y=degrees, s=30, linewidth=0) known_inds = np.where(class_labels != "Unk")[0] # %% [markdown] # # from graspy.cluster import PartitionalGaussianCluster # %% [markdown] # # Run clustering using LSE on the sum graph n_verts = adj.shape[0] latent = lse(adj, n_components, regularizer=None, ptr=PTR) pairplot(latent, labels=class_labels, title=embed) # %% [markdown] # # class PartitionCluster: def __init__(self): self.min_split_samples = 5 def fit(self, X, y=None): n_samples = X.shape[0] if n_samples > self.min_split_samples: cluster = GaussianCluster(min_components=1,
print((left_pair_ids == right_pair_ids).all()) sym_mg.make_lcc() n_pairs = sym_mg.meta["Pair ID"].nunique() - 1 left_pair_ids = sym_mg["Pair ID"][:n_pairs] right_pair_ids = sym_mg["Pair ID"][n_pairs:2 * n_pairs] print((left_pair_ids == right_pair_ids).all()) uni_pair, counts = np.unique(sym_mg["Pair ID"], return_counts=True) print(np.min(counts)) # %% [markdown] # # left_pair_ids = sym_mg["Pair ID"][:n_pairs] right_pair_ids = sym_mg["Pair ID"][n_pairs:2 * n_pairs] latent = lse(sym_mg.adj, n_components=None) left_latent = latent[:n_pairs, :] right_latent = latent[n_pairs:2 * n_pairs, :] R, scalar = orthogonal_procrustes(left_latent, right_latent) n_components = latent.shape[1] class_labels = sym_mg["lineage"] n_unique = len(np.unique(class_labels)) sym_mg.meta["Original index"] = range(len(sym_mg.meta)) left_df = sym_mg.meta[sym_mg.meta["Hemisphere"] == "L"] left_inds = left_df["Original index"].values left_latent = latent[left_inds, :] left_latent = left_latent @ R latent[left_inds, :] = left_latent latent_cols = [f"dim {i}" for i in range(latent.shape[1])]