def compute_ari(idx, param_df, classes, class_type="Class 1", remove_non_mb=False): preprocess_params = dict(param_df.loc[idx, ["binarize", "threshold"]]) graph_type = param_df.loc[idx, "graph_type"] mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess(mg, sym_threshold=True, remove_pdiff=True, **preprocess_params) left_mb_indicator = mg.meta[class_type].isin(classes) & ( mg.meta["Hemisphere"] == "L" ) right_mb_indicator = mg.meta[class_type].isin(classes) & ( mg.meta["Hemisphere"] == "R" ) labels = np.zeros(len(mg.meta)) labels[left_mb_indicator.values] = 1 labels[right_mb_indicator.values] = 2 pred_labels = best_block_df[idx] pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)] assert np.array_equal(pred_labels.index, mg.meta.index), print(idx) if remove_non_mb: # only consider ARI for clusters with some MB mass uni_pred = np.unique(pred_labels) keep_mask = np.ones(len(labels), dtype=bool) for p in uni_pred: if np.sum(labels[pred_labels == p]) == 0: keep_mask[pred_labels == p] = False labels = labels[keep_mask] pred_labels = pred_labels[keep_mask] ari = adjusted_rand_score(labels, pred_labels) return ari
def compute_ari(idx): preprocess_params = dict(best_param_df.loc[idx, ["binarize", "threshold"]]) graph_type = best_param_df.loc[idx, "graph_type"] mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess(mg, sym_threshold=True, remove_pdiff=True, **preprocess_params) left_mb_indicator = mg.meta["Class 1"].isin(mb_classes) & ( mg.meta["Hemisphere"] == "L") right_mb_indicator = mg.meta["Class 1"].isin(mb_classes) & ( mg.meta["Hemisphere"] == "R") labels = np.zeros(len(mg.meta)) labels[left_mb_indicator.values] = 1 labels[right_mb_indicator.values] = 2 pred_labels = best_block_df[idx] pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)] assert np.array_equal(pred_labels.index, mg.meta.index) ari = adjusted_rand_score(labels, pred_labels) return ari
def run_experiment( graph_type=None, threshold=None, binarize=None, seed=None, param_key=None, objective_function=None, implementation="leidenalg", **kws, ): np.random.seed(seed) # load and preprocess the data mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess( mg, threshold=threshold, sym_threshold=True, remove_pdiff=True, binarize=binarize, ) if implementation == "leidenalg": if objective_function == "CPM": partition_type = la.CPMVertexPartition elif objective_function == "modularity": partition_type = la.ModularityVertexPartition partition, modularity = run_leiden( mg, temp_loc=seed, implementation=implementation, partition_type=partition_type, **kws, ) elif implementation == "igraph": partition, modularity = run_leiden( mg, temp_loc=seed, implementation=implementation, **kws ) partition.name = param_key return partition, modularity
return last # %% [markdown] # ## Load data # In this case we are working with `G`, the directed graph formed by summing the edge # weights of the 4 different graph types. Preprocessing here includes removing # partially differentiated cells, and cutting out the lowest 5th percentile of nodes in # terms of their number of incident synapses. 5th percentile ~= 12 synapses. After this, # the largest connected component is used. mg = load_metagraph("G", version="2020-04-01") mg = preprocess( mg, threshold=0, sym_threshold=False, remove_pdiff=True, binarize=False, weight="weight", ) meta = mg.meta # plot where we are cutting out nodes based on degree degrees = mg.calculate_degrees() fig, ax = plt.subplots(1, 1, figsize=(5, 2.5)) sns.distplot(np.log10(degrees["Total edgesum"]), ax=ax) q = np.quantile(degrees["Total edgesum"], 0.05) ax.axvline(np.log10(q), linestyle="--", color="r") ax.set_xlabel("log10(total synapses)") # remove low degree neurons idx = meta[degrees["Total edgesum"] > q].index
def stashcsv(df, name, **kws): savecsv(df, name, foldername=FNAME, save_on=True, **kws) VERSION = "2020-01-29" print(f"Using version {VERSION}") graph_type = "Gad" threshold = 1 weight = "weight" mg = load_metagraph("Gad", VERSION) mg = preprocess( mg, threshold=threshold, sym_threshold=True, remove_pdiff=False, binarize=False, weight=weight, ) print(f"Preprocessed graph {graph_type} with threshold={threshold}, weight={weight}") out_classes = ["O_dVNC"] sens_classes = ["sens"] cutoff = 8 print(f"Finding paths from {sens_classes} to {out_classes} of max length {cutoff}") adj = nx.to_numpy_array(mg.g, weight=weight, nodelist=mg.meta.index.values) prob_mat = adj.copy() row_sums = prob_mat.sum(axis=1) row_sums[row_sums == 0] = 1
) def stashobj(obj, name, **kws): saveobj(obj, name, foldername=FNAME, save_on=SAVEOBJS, **kws) graph_type = "G" threshold = 3 binarize = True # load and preprocess the data mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess(mg, threshold=threshold, sym_threshold=True, remove_pdiff=True, binarize=binarize) #%% import leidenalg as la import igraph as ig def _process_metagraph(mg, temp_loc): adj = mg.adj adj = symmetrize(adj, method="avg") mg = MetaGraph(adj, mg.meta) nx.write_graphml(mg.g, temp_loc)
param_df.loc[rank_df.index, "rank_AL-roARI"] = rank_df["AL-roARI"] param_df.loc[rank_df.index, "rank_pairedness"] = rank_df["pairedness"] param_df.loc[rank_df.index, "rank_adj_pairedness"] = rank_df["adj_pairedness"] #%% param_df.sort_values("pairedness", ascending=False) # %% [markdown] # # Plot a candidate # idx = sort_index[2] idx = "LorenBerglund" preprocess_params = dict(param_df.loc[idx, ["binarize", "threshold"]]) graph_type = param_df.loc[idx, "graph_type"] mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess(mg, sym_threshold=True, remove_pdiff=True, **preprocess_params) labels = np.zeros(len(mg.meta)) pred_labels = best_block_df[idx] pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)] partition = pred_labels.astype(int) title = idx class_labels = mg["Merge Class"] lineage_labels = mg["lineage"] basename = idx def augment_classes(class_labels, lineage_labels, fill_unk=True): if fill_unk: classlin_labels = class_labels.copy()
def run_experiment(graph_type=None, threshold=None, res=None, binarize=None, seed=None, param_key=None): # common names if BLIND: basename = f"{param_key}-" title = param_key else: basename = f"louvain-res{res}-t{threshold}-{graph_type}-" title = f"Louvain, {graph_type}, res = {res}, threshold = {threshold}" np.random.seed(seed) # load and preprocess the data mg = load_metagraph(graph_type, version=BRAIN_VERSION) mg = preprocess( mg, threshold=threshold, sym_threshold=True, remove_pdiff=True, binarize=binarize, ) g_sym = nx.to_undirected(mg.g) skeleton_labels = np.array(list(g_sym.nodes())) partition, modularity = run_louvain(g_sym, res, skeleton_labels) partition_series = pd.Series(partition, index=skeleton_labels) partition_series.name = param_key if SAVEFIGS: # get out some metadata class_label_dict = nx.get_node_attributes(g_sym, "Merge Class") class_labels = np.array(itemgetter(*skeleton_labels)(class_label_dict)) lineage_label_dict = nx.get_node_attributes(g_sym, "lineage") lineage_labels = np.array( itemgetter(*skeleton_labels)(lineage_label_dict)) lineage_labels = np.vectorize(lambda x: "~" + x)(lineage_labels) classlin_labels, color_dict, hatch_dict = augment_classes( class_labels, lineage_labels) # TODO then sort all of them by proportion of sensory/motor # barplot by merge class and lineage _, _, order = barplot_text( partition, classlin_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=True, figsize=(24, 18), title=title, hatch_dict=hatch_dict, return_order=True, ) stashfig(basename + "barplot-mergeclasslin-props") category_order = np.unique(partition)[order] fig, axs = barplot_text( partition, class_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=True, figsize=(24, 18), title=title, hatch_dict=None, category_order=category_order, ) stashfig(basename + "barplot-mergeclass-props") fig, axs = barplot_text( partition, class_labels, color_dict=color_dict, plot_proportions=False, norm_bar_width=False, figsize=(24, 18), title=title, hatch_dict=None, category_order=category_order, ) stashfig(basename + "barplot-mergeclass-counts") # TODO add gridmap counts = False weights = False prob_df = get_blockmodel_df(mg.adj, partition, return_counts=counts, use_weights=weights) prob_df = prob_df.reindex(category_order, axis=0) prob_df = prob_df.reindex(category_order, axis=1) probplot(100 * prob_df, fmt="2.0f", figsize=(20, 20), title=title, font_scale=0.7) stashfig(basename + f"probplot-counts{counts}-weights{weights}") return partition_series, modularity
full_meta[f"{sc}_{co}_order"] = full_meta[sc].map(class_value) total_sort_by.append(f"{sc}_{co}_order") total_sort_by.append(sc) full_mg = full_mg.sort_values(total_sort_by, ascending=False) full_meta = full_mg.meta n_leaf = full_meta[f"lvl{lowest_level}_labels"].nunique() n_pairs = len(full_meta) // 2 # %% [markdown] # ## Random walk stuff ad_mg = load_metagraph("Gad") ad_mg = preprocess(ad_mg, sym_threshold=False, remove_pdiff=True, binarize=False) ad_mg.meta["inds"] = range(len(ad_mg)) ad_adj = ad_mg.adj meta = ad_mg.meta source_groups = [ ("sens-ORN", ), ("sens-MN", ), ("sens-photoRh5", "sens-photoRh6"), ("sens-thermo", ), ("sens-vtd", ), ("sens-AN", ), ("dVNC", "dVNC;CN", "dVNC;RG", "dSEZ;dVNC"), ("dSEZ", "dSEZ;CN", "dSEZ;LHN", "dSEZ;dVNC"), ("motor-PaN", "motor-MN", "motor-VAN", "motor-AN"),