Python preprocess 예제들, src.graph.preprocess Python 예제들

예제 #1

0

파일 보기

def compute_ari(idx, param_df, classes, class_type="Class 1", remove_non_mb=False):
    preprocess_params = dict(param_df.loc[idx, ["binarize", "threshold"]])
    graph_type = param_df.loc[idx, "graph_type"]
    mg = load_metagraph(graph_type, version=BRAIN_VERSION)
    mg = preprocess(mg, sym_threshold=True, remove_pdiff=True, **preprocess_params)
    left_mb_indicator = mg.meta[class_type].isin(classes) & (
        mg.meta["Hemisphere"] == "L"
    )
    right_mb_indicator = mg.meta[class_type].isin(classes) & (
        mg.meta["Hemisphere"] == "R"
    )
    labels = np.zeros(len(mg.meta))
    labels[left_mb_indicator.values] = 1
    labels[right_mb_indicator.values] = 2
    pred_labels = best_block_df[idx]
    pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)]
    assert np.array_equal(pred_labels.index, mg.meta.index), print(idx)

    if remove_non_mb:  # only consider ARI for clusters with some MB mass
        uni_pred = np.unique(pred_labels)
        keep_mask = np.ones(len(labels), dtype=bool)
        for p in uni_pred:
            if np.sum(labels[pred_labels == p]) == 0:
                keep_mask[pred_labels == p] = False
        labels = labels[keep_mask]
        pred_labels = pred_labels[keep_mask]

    ari = adjusted_rand_score(labels, pred_labels)
    return ari

예제 #2

0

파일 보기

def compute_ari(idx):
    preprocess_params = dict(best_param_df.loc[idx, ["binarize", "threshold"]])
    graph_type = best_param_df.loc[idx, "graph_type"]
    mg = load_metagraph(graph_type, version=BRAIN_VERSION)
    mg = preprocess(mg,
                    sym_threshold=True,
                    remove_pdiff=True,
                    **preprocess_params)
    left_mb_indicator = mg.meta["Class 1"].isin(mb_classes) & (
        mg.meta["Hemisphere"] == "L")
    right_mb_indicator = mg.meta["Class 1"].isin(mb_classes) & (
        mg.meta["Hemisphere"] == "R")
    labels = np.zeros(len(mg.meta))
    labels[left_mb_indicator.values] = 1
    labels[right_mb_indicator.values] = 2
    pred_labels = best_block_df[idx]
    pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)]
    assert np.array_equal(pred_labels.index, mg.meta.index)
    ari = adjusted_rand_score(labels, pred_labels)
    return ari

예제 #3

0

파일 보기

def run_experiment(
    graph_type=None,
    threshold=None,
    binarize=None,
    seed=None,
    param_key=None,
    objective_function=None,
    implementation="leidenalg",
    **kws,
):
    np.random.seed(seed)

    # load and preprocess the data
    mg = load_metagraph(graph_type, version=BRAIN_VERSION)
    mg = preprocess(
        mg,
        threshold=threshold,
        sym_threshold=True,
        remove_pdiff=True,
        binarize=binarize,
    )
    if implementation == "leidenalg":
        if objective_function == "CPM":
            partition_type = la.CPMVertexPartition
        elif objective_function == "modularity":
            partition_type = la.ModularityVertexPartition
        partition, modularity = run_leiden(
            mg,
            temp_loc=seed,
            implementation=implementation,
            partition_type=partition_type,
            **kws,
        )
    elif implementation == "igraph":
        partition, modularity = run_leiden(
            mg, temp_loc=seed, implementation=implementation, **kws
        )
    partition.name = param_key
    return partition, modularity

예제 #4

0

파일 보기

    return last


# %% [markdown]
# ## Load data
# In this case we are working with `G`, the directed graph formed by summing the edge
# weights of the 4 different graph types. Preprocessing here includes removing
# partially differentiated cells, and cutting out the lowest 5th percentile of nodes in
# terms of their number of incident synapses. 5th percentile ~= 12 synapses. After this,
# the largest connected component is used.

mg = load_metagraph("G", version="2020-04-01")
mg = preprocess(
    mg,
    threshold=0,
    sym_threshold=False,
    remove_pdiff=True,
    binarize=False,
    weight="weight",
)
meta = mg.meta

# plot where we are cutting out nodes based on degree
degrees = mg.calculate_degrees()
fig, ax = plt.subplots(1, 1, figsize=(5, 2.5))
sns.distplot(np.log10(degrees["Total edgesum"]), ax=ax)
q = np.quantile(degrees["Total edgesum"], 0.05)
ax.axvline(np.log10(q), linestyle="--", color="r")
ax.set_xlabel("log10(total synapses)")

# remove low degree neurons
idx = meta[degrees["Total edgesum"] > q].index

예제 #5

0

파일 보기

def stashcsv(df, name, **kws):
    savecsv(df, name, foldername=FNAME, save_on=True, **kws)


VERSION = "2020-01-29"
print(f"Using version {VERSION}")

graph_type = "Gad"
threshold = 1
weight = "weight"
mg = load_metagraph("Gad", VERSION)
mg = preprocess(
    mg,
    threshold=threshold,
    sym_threshold=True,
    remove_pdiff=False,
    binarize=False,
    weight=weight,
)
print(f"Preprocessed graph {graph_type} with threshold={threshold}, weight={weight}")

out_classes = ["O_dVNC"]
sens_classes = ["sens"]
cutoff = 8

print(f"Finding paths from {sens_classes} to {out_classes} of max length {cutoff}")

adj = nx.to_numpy_array(mg.g, weight=weight, nodelist=mg.meta.index.values)
prob_mat = adj.copy()
row_sums = prob_mat.sum(axis=1)
row_sums[row_sums == 0] = 1

예제 #6

0

파일 보기

    )


def stashobj(obj, name, **kws):
    saveobj(obj, name, foldername=FNAME, save_on=SAVEOBJS, **kws)


graph_type = "G"
threshold = 3
binarize = True

# load and preprocess the data
mg = load_metagraph(graph_type, version=BRAIN_VERSION)
mg = preprocess(mg,
                threshold=threshold,
                sym_threshold=True,
                remove_pdiff=True,
                binarize=binarize)

#%%

import leidenalg as la
import igraph as ig


def _process_metagraph(mg, temp_loc):
    adj = mg.adj
    adj = symmetrize(adj, method="avg")
    mg = MetaGraph(adj, mg.meta)
    nx.write_graphml(mg.g, temp_loc)

예제 #7

0

파일 보기

param_df.loc[rank_df.index, "rank_AL-roARI"] = rank_df["AL-roARI"]
param_df.loc[rank_df.index, "rank_pairedness"] = rank_df["pairedness"]
param_df.loc[rank_df.index, "rank_adj_pairedness"] = rank_df["adj_pairedness"]

#%%
param_df.sort_values("pairedness", ascending=False)

# %% [markdown]
# # Plot a candidate

# idx = sort_index[2]
idx = "LorenBerglund"
preprocess_params = dict(param_df.loc[idx, ["binarize", "threshold"]])
graph_type = param_df.loc[idx, "graph_type"]
mg = load_metagraph(graph_type, version=BRAIN_VERSION)
mg = preprocess(mg, sym_threshold=True, remove_pdiff=True, **preprocess_params)

labels = np.zeros(len(mg.meta))

pred_labels = best_block_df[idx]
pred_labels = pred_labels[pred_labels.index.isin(mg.meta.index)]
partition = pred_labels.astype(int)
title = idx
class_labels = mg["Merge Class"]
lineage_labels = mg["lineage"]
basename = idx


def augment_classes(class_labels, lineage_labels, fill_unk=True):
    if fill_unk:
        classlin_labels = class_labels.copy()

예제 #8

0

파일 보기

def run_experiment(graph_type=None,
                   threshold=None,
                   res=None,
                   binarize=None,
                   seed=None,
                   param_key=None):
    # common names
    if BLIND:
        basename = f"{param_key}-"
        title = param_key
    else:
        basename = f"louvain-res{res}-t{threshold}-{graph_type}-"
        title = f"Louvain, {graph_type}, res = {res}, threshold = {threshold}"

    np.random.seed(seed)

    # load and preprocess the data
    mg = load_metagraph(graph_type, version=BRAIN_VERSION)
    mg = preprocess(
        mg,
        threshold=threshold,
        sym_threshold=True,
        remove_pdiff=True,
        binarize=binarize,
    )
    g_sym = nx.to_undirected(mg.g)
    skeleton_labels = np.array(list(g_sym.nodes()))
    partition, modularity = run_louvain(g_sym, res, skeleton_labels)

    partition_series = pd.Series(partition, index=skeleton_labels)
    partition_series.name = param_key

    if SAVEFIGS:
        # get out some metadata
        class_label_dict = nx.get_node_attributes(g_sym, "Merge Class")
        class_labels = np.array(itemgetter(*skeleton_labels)(class_label_dict))
        lineage_label_dict = nx.get_node_attributes(g_sym, "lineage")
        lineage_labels = np.array(
            itemgetter(*skeleton_labels)(lineage_label_dict))
        lineage_labels = np.vectorize(lambda x: "~" + x)(lineage_labels)
        classlin_labels, color_dict, hatch_dict = augment_classes(
            class_labels, lineage_labels)

        # TODO then sort all of them by proportion of sensory/motor
        # barplot by merge class and lineage
        _, _, order = barplot_text(
            partition,
            classlin_labels,
            color_dict=color_dict,
            plot_proportions=False,
            norm_bar_width=True,
            figsize=(24, 18),
            title=title,
            hatch_dict=hatch_dict,
            return_order=True,
        )
        stashfig(basename + "barplot-mergeclasslin-props")
        category_order = np.unique(partition)[order]

        fig, axs = barplot_text(
            partition,
            class_labels,
            color_dict=color_dict,
            plot_proportions=False,
            norm_bar_width=True,
            figsize=(24, 18),
            title=title,
            hatch_dict=None,
            category_order=category_order,
        )
        stashfig(basename + "barplot-mergeclass-props")
        fig, axs = barplot_text(
            partition,
            class_labels,
            color_dict=color_dict,
            plot_proportions=False,
            norm_bar_width=False,
            figsize=(24, 18),
            title=title,
            hatch_dict=None,
            category_order=category_order,
        )
        stashfig(basename + "barplot-mergeclass-counts")

        # TODO add gridmap

        counts = False
        weights = False
        prob_df = get_blockmodel_df(mg.adj,
                                    partition,
                                    return_counts=counts,
                                    use_weights=weights)
        prob_df = prob_df.reindex(category_order, axis=0)
        prob_df = prob_df.reindex(category_order, axis=1)
        probplot(100 * prob_df,
                 fmt="2.0f",
                 figsize=(20, 20),
                 title=title,
                 font_scale=0.7)
        stashfig(basename + f"probplot-counts{counts}-weights{weights}")

    return partition_series, modularity

예제 #9

0

파일 보기

        full_meta[f"{sc}_{co}_order"] = full_meta[sc].map(class_value)
        total_sort_by.append(f"{sc}_{co}_order")
    total_sort_by.append(sc)

full_mg = full_mg.sort_values(total_sort_by, ascending=False)
full_meta = full_mg.meta

n_leaf = full_meta[f"lvl{lowest_level}_labels"].nunique()
n_pairs = len(full_meta) // 2

# %% [markdown]
# ## Random walk stuff

ad_mg = load_metagraph("Gad")
ad_mg = preprocess(ad_mg,
                   sym_threshold=False,
                   remove_pdiff=True,
                   binarize=False)
ad_mg.meta["inds"] = range(len(ad_mg))
ad_adj = ad_mg.adj
meta = ad_mg.meta

source_groups = [
    ("sens-ORN", ),
    ("sens-MN", ),
    ("sens-photoRh5", "sens-photoRh6"),
    ("sens-thermo", ),
    ("sens-vtd", ),
    ("sens-AN", ),
    ("dVNC", "dVNC;CN", "dVNC;RG", "dSEZ;dVNC"),
    ("dSEZ", "dSEZ;CN", "dSEZ;LHN", "dSEZ;dVNC"),
    ("motor-PaN", "motor-MN", "motor-VAN", "motor-AN"),