예제 #1
0
def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name='whole_brain'):
    from graspy.embed import OmnibusEmbed, ClassicalMDS
    variance_threshold = VarianceThreshold(threshold=0.00001)
    diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))])
    graphs_ix_keep = variance_threshold.fit(
        diags.reshape(diags.shape[0], diags.shape[1] *
                      diags.shape[2]).T).get_support(indices=True)
    pop_array_red = [pop_array[i] for i in graphs_ix_keep]

    # Omnibus embedding -- random dot product graph (rdpg)
    print("%s%s%s%s%s" % ('Embedding ensemble for atlas: ', atlas, ' and ',
                          subgraph_name, '...'))
    omni = OmnibusEmbed(check_lcc=False)
    mds = ClassicalMDS()
    try:
        omni_fit = omni.fit_transform(pop_array_red)
    except:
        omni_fit = omni.fit_transform(pop_array)

    # Transform omnibus tensor into dissimilarity feature
    mds_fit = mds.fit_transform(omni_fit)

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = dir_path + '/embeddings'
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = "%s%s%s%s%s%s%s%s" % (namer_dir, '/', list(
        flatten(ID))[0], '_omnetome_', atlas, '_', subgraph_name, '.npy')
    print('Saving...')
    np.save(out_path, mds_fit)
    del mds, mds_fit, omni, omni_fit
    return out_path
예제 #2
0
파일: utils.py 프로젝트: Mozihua/PyNets
    def omni_embed(pop_array):
        variance_threshold = VarianceThreshold(threshold=0.05)
        diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))])
        diags_red = diags.reshape(diags.shape[0], diags.shape[1] * diags.shape[2])
        var_thr = variance_threshold.fit(diags_red.T)
        graphs_ix_keep = var_thr.get_support(indices=True)
        pop_array_red = [pop_array[i] for i in graphs_ix_keep]

        # Omnibus embedding -- random dot product graph (rdpg)
        print("%s%s%s" % ('Embedding ensemble for atlas: ', atlas, '...'))
        omni = OmnibusEmbed(check_lcc=False)
        try:
            omni_fit = omni.fit_transform(pop_array_red)
            mds = ClassicalMDS()
            mds_fit = mds.fit_transform(omni_fit)
        except:
            omni_fit = omni.fit_transform(pop_array)
            mds = ClassicalMDS()
            mds_fit = mds.fit_transform(omni_fit)

        # Transform omnibus tensor into dissimilarity feature
        dir_path = os.path.dirname(graph_path)
        out_path = "%s%s%s%s%s%s" % (dir_path, '/', list(flatten(ID))[0], '_omnetome_', atlas, '.npy')
        print('Saving...')
        np.save(out_path, mds_fit)
        del mds, mds_fit, omni, omni_fit
        return
예제 #3
0
 def __init__(self,
              learning_method,
              memory=None,
              verbose=False,
              plot_method=None,
              kfold=KFold(n_splits=4),
              flat_method=Flat):
     super(MDSPipeline, self).__init__(steps=learning_method,
                                       memory=memory,
                                       verbose=verbose,
                                       plot_method=plot_method,
                                       kfold=kfold)
     #self.LM = learning_method[0][1]
     self.flat_method = flat_method
     if not isinstance(self.steps[0][1], ClassicalMDS):
         self.steps = [
             ('MDS', ClassicalMDS()),
             ('Flat', FunctionTransformer(self.flat_method, validate=False))
         ] + self.steps
     if plot_method is not None:
         self.plot_method = plot_method
     if kfold is not None:
         self.kfold = kfold
예제 #4
0
# indicator = np.full(len(gm.positions_), i)
# all_positions += gm.positions_
# init_indicator.append(indicator)

init_indicator.append(["Barycenter"])
init_indicator.append(["Truth"])
init_indicator = np.concatenate(init_indicator)
# init_indicator = np.array(init_indicator)
all_positions.append(np.full(A1.shape, 1 / A1.size))
all_positions.append(P.T)
all_positions = np.array(all_positions)
all_positions = all_positions.reshape((len(all_positions), -1))

position_pdist = pairwise_distances(all_positions, metric="euclidean")

cmds = ClassicalMDS(n_components=2, dissimilarity="euclidean")
all_X = cmds.fit_transform(all_positions)
all_X -= all_X[-1]

# remove_rand = False
# if remove_rand:
#     X = all_X[n_rand:]
#     init_indicator = init_indicator[n_rand:]
# else:
X = all_X

plot_df = pd.DataFrame(data=X)
plot_df["init"] = init_indicator
sns.set_context("talk")
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
# sns.scatterplot(data=plot_df[plot_df["init"] == "Random"], x=0, y=1, ax=ax)
예제 #5
0
# metaheatmap(path_mat, meta, sortby_classes=["class_rank"], sortby_nodes=["mean_rank"])

# %% [markdown]
# #

from sklearn.manifold import MDS

path_mat = path_mat.tocsr()  # for fast mult

print("Finding pairwise jaccard distances")
pdist_sparse = pairwise_sparse_jaccard_distance(path_mat)

print(pdist_sparse.shape)

print("Embedding with MDS")
mds = ClassicalMDS(dissimilarity="precomputed")
# mds = MDS(dissimilarity="precomputed", n_components=6, n_init=16, n_jobs=-2)
jaccard_embedding = mds.fit_transform(pdist_sparse)

# %% [markdown]
# #

print("Clustering embedding")
agmm = AutoGMMCluster(min_components=10,
                      max_components=40,
                      affinity="euclidean",
                      linkage="single")
labels = agmm.fit_predict(jaccard_embedding)

pairplot(jaccard_embedding,
         title="AGMM o CMDS o Jaccard o Sensorimotor Paths",
예제 #6
0
def quick_embed_viewer(embed,
                       labels=None,
                       lp_inds=None,
                       rp_inds=None,
                       left_right_indexing=False):
    if left_right_indexing:
        lp_inds = np.arange(len(embed) // 2)
        rp_inds = np.arange(len(embed) // 2) + len(embed) // 2

    fig, axs = plt.subplots(3, 2, figsize=(20, 30))

    cmds = ClassicalMDS(n_components=2)
    cmds_euc = cmds.fit_transform(embed)
    plot_df = pd.DataFrame(data=cmds_euc)
    plot_df["labels"] = labels
    plot_kws = dict(
        x=0,
        y=1,
        hue="labels",
        palette=CLASS_COLOR_DICT,
        legend=False,
        s=20,
        linewidth=0.5,
        alpha=0.7,
    )
    ax = axs[0, 0]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("CMDS o euclidean")

    cmds = ClassicalMDS(n_components=2, dissimilarity="precomputed")
    pdist = symmetrize(pairwise_distances(embed, metric="cosine"))
    cmds_cos = cmds.fit_transform(pdist)
    plot_df[0] = cmds_cos[:, 0]
    plot_df[1] = cmds_cos[:, 1]
    ax = axs[0, 1]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("CMDS o cosine")

    tsne = TSNE(metric="euclidean")
    tsne_euc = tsne.fit_transform(embed)
    plot_df[0] = tsne_euc[:, 0]
    plot_df[1] = tsne_euc[:, 1]
    ax = axs[1, 0]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("TSNE o euclidean")

    tsne = TSNE(metric="precomputed")
    tsne_cos = tsne.fit_transform(pdist)
    plot_df[0] = tsne_cos[:, 0]
    plot_df[1] = tsne_cos[:, 1]
    ax = axs[1, 1]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("TSNE o cosine")

    umap = UMAP(metric="euclidean", n_neighbors=30, min_dist=1)
    umap_euc = umap.fit_transform(embed)
    plot_df[0] = umap_euc[:, 0]
    plot_df[1] = umap_euc[:, 1]
    ax = axs[2, 0]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("UMAP o euclidean")

    umap = UMAP(metric="cosine", n_neighbors=30, min_dist=1)
    umap_cos = umap.fit_transform(embed)
    plot_df[0] = umap_cos[:, 0]
    plot_df[1] = umap_cos[:, 1]
    ax = axs[2, 1]
    sns.scatterplot(data=plot_df, ax=ax, **plot_kws)
    ax.axis("off")
    add_connections(
        plot_df.iloc[lp_inds, 0],
        plot_df.iloc[rp_inds, 0],
        plot_df.iloc[lp_inds, 1],
        plot_df.iloc[rp_inds, 1],
        ax=ax,
    )
    ax.set_title("UMAP o cosine")
예제 #7
0
def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name="whole_brain"):
    """
    Omnibus embedding of arbitrary number of input graphs with matched vertex
    sets.

    Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency
    matrices of a collection :math:`m` undirected graphs with matched vertices.
    Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where
    :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded
    using adjacency spectral embedding.


    Parameters
    ----------
    graphs : list of nx.Graph or ndarray, or ndarray
        If list of nx.Graph, each Graph must contain same number of nodes.
        If list of ndarray, each array must have shape (n_vertices, n_vertices).
        If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
    atlas : str
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing omni embedding tensor.

    References
    ----------
    .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017,
      November). A central limit theorem for an omnibus embedding of multiple random
      dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International
      Conference on (pp. 964-967). IEEE.
    .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S.,
      & Vogelstein, J. T. (2019). Graspy: Graph statistics in python.
      Journal of Machine Learning Research.

    """
    import numpy as np
    from pynets.core.utils import flatten
    from graspy.embed import OmnibusEmbed, ClassicalMDS
    from joblib import dump

    # Omnibus embedding
    print(
        f"{'Embedding unimodal omnetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}"
    )
    omni = OmnibusEmbed(check_lcc=False)
    mds = ClassicalMDS()
    omni_fit = omni.fit_transform(pop_array)

    # Transform omnibus tensor into dissimilarity feature
    mds_fit = mds.fit_transform(omni_fit)

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = (
        f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_omnetome.npy"
    )

    out_path_est_omni = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_omni.joblib"
    out_path_est_mds = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_mds.joblib"

    dump(omni, out_path_est_omni)
    dump(omni, out_path_est_mds)

    print("Saving...")
    np.save(out_path, mds_fit)
    del mds, mds_fit, omni, omni_fit
    return out_path
예제 #8
0
    figsize=(20, 20),
    row_linkage=Z,
    col_linkage=Z,
    xticklabels=False,
    yticklabels=False,
)
stashfig("agglomerative-path-dist-mat" + basename)

# %% [markdown]
# ##

from graspy.embed import select_dimension

print("Running CMDS on path dissimilarity...")
X = path_dist_mat
cmds = ClassicalMDS(dissimilarity="precomputed",
                    n_components=int(np.ceil(np.log2(np.min(X.shape)))))
path_embed = cmds.fit_transform(X)
elbows, elbow_vals = select_dimension(cmds.singular_values_, n_elbows=3)
rng = np.arange(1, len(cmds.singular_values_) + 1)
elbows = np.array(elbows)
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
pc = ax.scatter(elbows, elbow_vals, color="red", label="ZG")
pc.set_zorder(10)
ax.plot(rng, cmds.singular_values_, "o-")
ax.legend()
stashfig("cmds-screeplot" + basename)

# %% [markdown]
# ##

pairplot(path_embed, alpha=0.02)
예제 #9
0
def _omni_embed(pop_array,
                atlas,
                graph_path_list,
                ID,
                subgraph_name="all_nodes",
                n_components=None,
                norm=1):
    """
    Omnibus embedding of arbitrary number of input graphs with matched vertex
    sets.

    Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency
    matrices of a collection :math:`m` undirected graphs with matched vertices.
    Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where
    :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded
    using adjacency spectral embedding.


    Parameters
    ----------
    pop_array : list of nx.Graph or ndarray, or ndarray
        If list of nx.Graph, each Graph must contain same number of nodes.
        If list of ndarray, each array must have shape (n_vertices, n_vertices).
        If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
    atlas : str
        The name of an atlas (indicating the node definition).
    graph_pathlist : list
        List of file paths to graphs in pop_array.
    ID : str
        An arbitrary subject identifier.
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing omni embedding tensor.

    References
    ----------
    .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017,
      November). A central limit theorem for an omnibus embedding of multiple random
      dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International
      Conference on (pp. 964-967). IEEE.
    .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S.,
      & Vogelstein, J. T. (2019). Graspy: Graph statistics in python.
      Journal of Machine Learning Research.

    """
    import networkx as nx
    import numpy as np
    from pynets.core.utils import flatten
    from graspy.embed import OmnibusEmbed, ClassicalMDS
    from joblib import dump
    from pynets.stats.netstats import CleanGraphs

    clean_mats = []
    i = 0
    for graph_path in graph_path_list:
        cg = CleanGraphs(None, None, graph_path, 0, norm)

        if float(norm) >= 1:
            G = cg.normalize_graph()
            mat_clean = nx.to_numpy_array(G)
        else:
            mat_clean = pop_array[i]

        clean_mats.append(mat_clean)
        i += 1

    # Omnibus embedding
    print(f"{'Embedding unimodal omnetome for atlas: '}{atlas} and "
          f"{subgraph_name}{'...'}")
    omni = OmnibusEmbed(n_components=n_components, check_lcc=False)
    mds = ClassicalMDS(n_components=n_components)
    omni_fit = omni.fit_transform(pop_array)

    # Transform omnibus tensor into dissimilarity feature
    mds_fit = mds.fit_transform(
        omni_fit.reshape(omni_fit.shape[1], omni_fit.shape[2],
                         omni_fit.shape[0]))

    dir_path = str(Path(os.path.dirname(graph_path_list[0])).parent)

    namer_dir = f"{dir_path}/embeddings"
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = (
        f"{namer_dir}/gradient-OMNI_{atlas}_{subgraph_name}_"
        f"{os.path.basename(graph_path_list[0]).split('_thrtype')[0]}.npy")

    # out_path_est_omni = f"{namer_dir}/gradient-OMNI_{atlas}_" \
    #                     f"{subgraph_name}_" \
    #                     f"{os.path.basename(graph_path).split('_thrtype')[0]}" \
    #                     f"_MDS.joblib"
    # out_path_est_mds = f"{namer_dir}/gradient-OMNI_{atlas}_" \
    #                    f"{subgraph_name}_" \
    #                    f"{os.path.basename(graph_path).split('_thrtype')[0]}" \
    #                    f"_MDS.joblib"

    # dump(omni, out_path_est_omni)
    # dump(omni, out_path_est_mds)

    print("Saving...")
    np.save(out_path, mds_fit)
    del mds, mds_fit, omni, omni_fit
    return out_path
예제 #10
0
# %% [markdown]
# ## Decide on an embedding method for distance matrix
dim_reduce = "cmds"

basename += f"-dim_red={dim_reduce}"

# %% [markdown]
# ##
print("Running dimensionality reduction on path dissimilarity...")

X = path_dist_mat
max_dim = int(np.ceil(np.log2(np.min(X.shape))))

if dim_reduce == "cmds":
    cmds = ClassicalMDS(dissimilarity="precomputed", n_components=max_dim)
    path_embed = cmds.fit_transform(X)
    sing_vals = cmds.singular_values_
elif dim_reduce == "iso":
    iso = Isomap(n_components=max_dim, metric="precomputed")
    path_embed = iso.fit_transform(X)
    sing_vals = iso.kernel_pca_.lambdas_
elif dim_reduce == "tsne":
    best_embed = None
    best_kl = np.inf
    n_tsne = 10
    for i in range(n_tsne):
        manifold = TSNE(metric="precomputed")
        path_embed = manifold.fit_transform(X)
        kl = manifold.kl_divergence_
        print(kl)
예제 #11
0
    x_sum = X.sum(axis=1).A1
    y_sum = Y.sum(axis=1).A1
    xx, yy = np.meshgrid(x_sum, y_sum)
    union = (xx + yy).T - intersect

    return (1 - intersect / union).A


run_name = "86.1-BDP-prob-path-cluster"
threshold = 1
weight = "weight"
graph_type = "Gad"
cutoff = 8
base = f"-c{cutoff}-t{threshold}-{graph_type}"


base_path = Path(f"./maggot_models/notebooks/outs/{run_name}/csvs")
meta = pd.read_csv(base_path / str("meta" + base + ".csv"), index_col=0)
path_mat = pd.read_csv(base_path / str("prob-path-mat" + base + ".csv"), index_col=0)

sparse_path = csr_matrix(path_mat.values)

euclid_dists = pairwise_distances(sparse_path, metric="euclidean")

mds = ClassicalMDS(dissimilarity="precomputed")
mds_embed = mds.fit_transform(euclid_dists)
embed_df = pd.DataFrame(data=mds_embed)

stashcsv(embed_df, "euclid-mds-embed")
예제 #12
0
#%%
import pandas as pd
import numpy as np
from graspy.embed import ClassicalMDS
import seaborn as sns
from sklearn.metrics import pairwise_distances

data_loc = "maggot_models/data/external/17-08-26L6-allC-cl.csv"
ts_df = pd.read_csv(data_loc, index_col=None)
ts_mat = ts_df.values.T
# %% [markdown]
# #

corr_mat = pairwise_distances(ts_mat, metric="correlation")

# %% [markdown]
# #
sns.clustermap(corr_mat)

# %% [markdown]
# #
from graspy.plot import pairplot

mds = ClassicalMDS(dissimilarity="precomputed")
embed = mds.fit_transform(corr_mat)
pairplot(embed)
예제 #13
0
# %% [markdown]
# ##
plot_pairs(
    U,
    labels=meta["merge_class"].values,
    left_pair_inds=lp_inds,
    right_pair_inds=rp_inds,
)

# %% [markdown]
# ##
from graspy.utils import symmetrize

# manifold = TSNE(metric="cosine")
# tsne_embed = tsne.fit_transform(U)
manifold = ClassicalMDS(n_components=U.shape[1] - 1,
                        dissimilarity="precomputed")
# manifold = MDS(n_components=2, dissimilarity="precomputed")
# manifold = Isomap(n_components=2, metric="precomputed")
pdist = symmetrize(pairwise_distances(U, metric="cosine"))
manifold_embed = manifold.fit_transform(pdist)

plot_pairs(
    manifold_embed,
    labels=meta["merge_class"].values,
    left_pair_inds=lp_inds,
    right_pair_inds=rp_inds,
)

# %% [markdown]
# ##
예제 #14
0
sns.clustermap(
    path_dist_mat,
    figsize=(20, 20),
    row_linkage=Z,
    col_linkage=Z,
    xticklabels=False,
    yticklabels=False,
)
stashfig("clustermap")
# %% [markdown]
# ##
from graspy.embed import ClassicalMDS
from src.visualization import screeplot

cmds = ClassicalMDS(dissimilarity="precomputed", n_components=10)

path_embed = cmds.fit_transform(path_dist_mat)

plt.plot(cmds.singular_values_, "o")

# %% [markdown]
# ##
from graspy.plot import pairplot

n_components = 5
pairplot(path_embed[:, :n_components], alpha=0.1)

# %% [markdown]
# ##
from graspy.cluster import AutoGMMCluster
예제 #15
0
#%%
n_components = 8
ase = AdjacencySpectralEmbed(n_components=n_components)
latent = ase.fit_transform(graph)
pairplot(latent, labels=block_labels)

latent /= np.linalg.norm(latent, axis=1)[:, np.newaxis]
pairplot(latent, labels=block_labels)


# def compute_cosine_similarity(latent):
#     for i in range(latent.shape[0])
similarity = latent @ latent.T
dissimilarity = 1 - similarity
print(dissimilarity[0, 0])
cmds = ClassicalMDS(n_components=n_components - 1, dissimilarity="precomputed")
cmds_latent = cmds.fit_transform(dissimilarity)
pairplot(cmds_latent, labels=block_labels)
#%%
hsbm = HSBMEstimator(
    n_subgraphs=8, n_subgroups=3, n_components_lvl1=8, n_components_lvl2=3
)
hsbm.fit(graph)
#%%
plt.style.use("seaborn-white")
model = hsbm.agglomerative_model_
dists = hsbm.subgraph_dissimilarities_
dists = dists - dists.min()

c = Colormap()
cmap = c.get_cmap_heat_r()