コード例 #1
0
def bilateral_ase(adj):
    ase = AdjacencySpectralEmbed(n_components=None,
                                 n_elbows=2,
                                 check_lcc=False)
    ipsi_adj = adj.copy()
    ipsi_adj[np.ix_(left_inds, right_inds)] = 0
    ipsi_adj[np.ix_(right_inds, left_inds)] = 0
    ipsi_embed = ase.fit_transform(ipsi_adj)

    procrust = Procrustes()
    align_ipsi_embed = []
    for e in ipsi_embed:
        procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds)
        align_e = procrust.transform(e, map_inds=left_inds)
        align_ipsi_embed.append(align_e)
    align_ipsi_embed = np.concatenate(align_ipsi_embed, axis=1)

    contra_adj = adj.copy()
    contra_adj[np.ix_(left_inds, left_inds)] = 0
    contra_adj[np.ix_(right_inds, right_inds)] = 0
    contra_embed = ase.fit_transform(contra_adj)

    procrust = Procrustes()
    align_contra_embed = []
    for e in contra_embed:
        procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds)
        align_e = procrust.transform(e, map_inds=left_inds)
        align_contra_embed.append(align_e)
    align_contra_embed = np.concatenate(align_contra_embed, axis=1)
    return align_ipsi_embed, align_contra_embed
コード例 #2
0
class ASEEmbedding(Embedding):
    """Implements an interface for adjacency spectral embedding; inherits from the Embedding class.

    """
    def __init__(self):
        self.model = AdjacencySpectralEmbed()

    def fit(self, X, S=None):
        Xh = np.hstack(self.model.fit_transform(X))
        if S is not None:
            Xh = np.hstack((Xh, S))
        clusterer = GaussianMixture(n_components=Xh.shape[1] // 2)
        clusterer.fit(Xh)
        predict_labels = clusterer.predict(Xh)
        self.y = predict_labels
        self.H = Xh

    def learn_embedding(self, G, S=None, **kwargs):
        X = nx.adjacency_matrix(G)
        X = X.todense()
        Xh = np.hstack(self.model.fit_transform(X))
        if S is not None:
            Xh = np.hstack((Xh, S))
        clusterer = GaussianMixture(n_components=Xh.shape[1] // 2)
        clusterer.fit(Xh)
        predict_labels = clusterer.predict(Xh)
        self.y = predict_labels
        self.H = Xh

    def get_reconstructed_adj(self, *a, **b):
        return self.model.latent_left_.dot(np.diag(
            self.model.singular_values_)).dot(self.model.latent_right_.T)
コード例 #3
0
def ase(adj, n_components, ptr=True):
    if ptr:
        adj = pass_to_ranks(adj)
    ase = AdjacencySpectralEmbed(n_components=n_components)
    latent = ase.fit_transform(adj)
    latent = np.concatenate(latent, axis=-1)
    return latent
コード例 #4
0
ファイル: berlin_fig.py プロジェクト: bstadt/graspy
def evaluate_models(graph,
                    labels=None,
                    title=None,
                    plot_graphs=False,
                    min_comp=0,
                    max_comp=1,
                    n_comp=5):

    if plot_graphs:
        heatmap(graph, inner_hier_labels=cell_labels)

    ## Set up models to test
    non_rdpg_models = [
        EREstimator(fit_degrees=False),
        SBEstimator(fit_degrees=False),
        SBEstimator(fit_degrees=True),
    ]

    d = [6]
    rdpg_models = [RDPGEstimator(n_components=i) for i in d]
    models = non_rdpg_models + rdpg_models

    names_nonRDPG = ["ER", "SBM", "DCSBM"]
    names_RDPG = ["RDPGrank{}".format(i) for i in d]
    names = names_nonRDPG + names_RDPG

    bics = []
    log_likelihoods = []

    ## Test models
    for model, name in zip(models, names):
        m = model.fit(graph, y=labels)
        if plot_graphs:
            heatmap(m.p_mat_,
                    inner_hier_labels=labels,
                    title=(name + "P matrix"))
            heatmap(m.sample(),
                    inner_hier_labels=labels,
                    title=(name + "sample"))
        bic = m.bic(graph)
        log_likelihoods.append(m.score(graph))
        bics.append(bic)
        plt.show()
        ase = AdjacencySpectralEmbed(n_components=2)
        latent = ase.fit_transform(m.p_mat_)
        # if type(latent) is tuple:
        #     pairplot(np.concatenate((latent[0], latent[1]), axis=1))
        #     plt.show()
        # else:
        print("here")
        # plt.figure(figsize=(20, 20))
        ax = scatterplot(latent,
                         labels=cell_labels,
                         height=4,
                         alpha=0.6,
                         font_scale=1.25)
        # plt.suptitle(name, y=0.94, x=0.1, fontsize=30, horizontalalignment="left")
        plt.savefig(name + "latent.png", format="png", dpi=1000)
        plt.close()
コード例 #5
0
 def test_passing_embeddings(self):
     np.random.seed(123)
     A1 = er_np(20, 0.8)
     A2 = er_np(20, 0.8)
     ase_1 = AdjacencySpectralEmbed(n_components=2)
     X1 = ase_1.fit_transform(A1)
     ase_2 = AdjacencySpectralEmbed(n_components=2)
     X2 = ase_2.fit_transform(A2)
     ase_3 = AdjacencySpectralEmbed(n_components=1)
     X3 = ase_3.fit_transform(A2)
     # check embeddings having weird ndim
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X2.reshape(-1, 1, 1))
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1.reshape(-1, 1, 1), X2)
     # check embeddings having mismatching number of components
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X3)
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X3, X1)
     # check passing weird stuff as input (caught by us)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict("hello there", X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, "hello there")
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict({"hello": "there"}, X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, {"hello": "there"})
     # check passing infinite in input (caught by check_array)
     with self.assertRaises(ValueError):
         X1_w_inf = X1.copy()
         X1_w_inf[1, 1] = np.inf
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1_w_inf, X2)
     # check that the appropriate input works
     ldt = LatentDistributionTest(input_graph=False)
     ldt.fit_predict(X1, X2)
コード例 #6
0
def lse(adj, n_components, regularizer=None):
    if PTR:
        adj = pass_to_ranks(adj)
    lap = to_laplace(adj, form="R-DAD")
    ase = AdjacencySpectralEmbed(n_components=n_components)
    latent = ase.fit_transform(lap)
    latent = np.concatenate(latent, axis=-1)
    return latent
コード例 #7
0
def mc_iter(n, m, p, q, tilde, i=1):
    X_graph = er_np(n, p*p)
    ase = AdjacencySpectralEmbed(n_components=1)
    X = ase.fit_transform(X_graph)

    Y_graph = er_np(m, q*q)
    ase = AdjacencySpectralEmbed(n_components=1)
    Y = ase.fit_transform(Y_graph)

    if tilde:
        X_new, Y_new = sample_noisy_points(X, Y)
    else:
        X_new, Y_new = X, Y

    ldt = LatentDistributionTest()
    pval = ldt.fit(X_new, Y_new, pass_graph=False)
    return pval
コード例 #8
0
def level(adj, meta, pred, reembed=False, X=None, R=None, plot_all=True):
    uni_labels, inv = np.unique(pred, return_inverse=True)
    all_sub_results = []
    sub_data = []

    for label in uni_labels:
        print(label)
        print()
        label_mask = pred == label
        sub_meta = meta[label_mask].copy()
        sub_meta["inds"] = range(len(sub_meta))
        sub_left_inds = sub_meta[sub_meta["left"]]["inds"].values
        sub_right_inds = sub_meta[sub_meta["right"]]["inds"].values
        sub_lp_inds, sub_rp_inds = get_paired_inds(sub_meta)
        sub_adj = adj[np.ix_(label_mask, label_mask)]

        if reembed:
            ase = AdjacencySpectralEmbed()
            # TODO look into PTR at this level as well
            sub_embed = ase.fit_transform(sub_adj)
            sub_X = np.concatenate(sub_embed, axis=1)
            sub_R, _ = orthogonal_procrustes(sub_X[sub_lp_inds],
                                             sub_X[sub_rp_inds])
            sub_X[sub_left_inds] = sub_X[sub_left_inds] @ sub_R
        else:
            sub_X = X[label_mask].copy()
            sub_R = R

        var_dict = {
            "meta": sub_meta,
            "left_inds": sub_left_inds,
            "right_inds": sub_right_inds,
            "left_pair_inds": sub_lp_inds,
            "right_pair_inds": sub_rp_inds,
            "X": sub_X,
            "adj": sub_adj,
            "reembed": reembed,
        }

        sub_data.append(var_dict)

        sub_results = crossval_cluster(
            sub_X,
            sub_left_inds,
            sub_right_inds,
            left_pair_inds=sub_lp_inds,
            right_pair_inds=sub_rp_inds,
            max_clusters=8,
            min_clusters=1,
            n_init=50,
        )

        fig, axs = plot_metrics(sub_results, plot_all=plot_all)
        fig.suptitle(f"Clustering for cluster {label}, reembed={reembed}")
        stashfig(f"cluster-profile-label={label}-reembed={reembed}")
        plt.close()
        all_sub_results.append(sub_results)
        return all_sub_results, sub_data
コード例 #9
0
def normalized_ase(graph, n_components=None, embed_kws={}):
    ase = AdjacencySpectralEmbed(n_components=n_components, **embed_kws)
    latent = ase.fit_transform(graph)
    if isinstance(latent, tuple):
        latent = np.concatenate(latent, axis=-1)
    norm_vec = np.linalg.norm(latent, axis=1)
    norm_vec[norm_vec == 0] = 1
    norm_latent = latent / norm_vec[:, np.newaxis]
    return norm_latent
コード例 #10
0
def lse(adj, n_components, regularizer=None, ptr=True):
    if ptr:
        adj = pass_to_ranks(adj)
    lap = to_laplace(adj, form="R-DAD", regularizer=regularizer)
    ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False)
    latent = ase.fit_transform(lap)
    # latent = LaplacianSpectralEmbed(
    #     form="R-DAD", n_components=n_components, regularizer=regularizer
    # )
    latent = np.concatenate(latent, axis=-1)
    return latent
コード例 #11
0
def ase_concatenate(adjs, n_components, ptr=True):
    if ptr:
        adjs = [pass_to_ranks(a) for a in adjs]
    ase = AdjacencySpectralEmbed(n_components=n_components // len(adjs))
    graph_latents = []
    for a in adjs:
        latent = ase.fit_transform(a)
        latent = np.concatenate(latent, axis=-1)
        graph_latents.append(latent)
    latent = np.concatenate(graph_latents, axis=-1)
    return latent
コード例 #12
0
    def _embed(self, adj=None):
        if adj is None:
            adj = self.adj
        # TODO look into PTR at this level as well
        # lp_inds, rp_inds = get_paired_inds(self.meta)
        lp_inds = self.left_pair_inds
        rp_inds = self.right_pair_inds

        embed_adj = pass_to_ranks(adj)
        if self.embed == "ase":
            embedder = AdjacencySpectralEmbed(
                n_components=self.n_components, n_elbows=self.n_elbows
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "lse":
            embedder = LaplacianSpectralEmbed(
                n_components=self.n_components,
                n_elbows=self.n_elbows,
                regularizer=self.regularizer,
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "unscaled_ase":
            embed_adj = pass_to_ranks(adj)
            embed_adj = augment_diagonal(embed_adj)
            embed = selectSVD(
                embed_adj, n_components=self.n_components, n_elbows=self.n_elbows
            )
            embed = (embed[0], embed[2].T)

        X = np.concatenate(embed, axis=1)

        fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds)
        print(f"Learning transformation with {fraction_paired} neurons paired")
        R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds])
        X[self.left_inds] = X[self.left_inds] @ R

        if self.normalize:
            row_sums = np.sum(X, axis=1)
            X /= row_sums[:, None]

        return X
コード例 #13
0
    def _embed(self, adj=None):
        if adj is None:
            adj = self.adj

        lp_inds = self.left_pair_inds
        rp_inds = self.right_pair_inds

        embed_adj = pass_to_ranks(adj)  # TODO PTR here?
        if self.plus_c:
            embed_adj += 1 / adj.size
        if self.embed == "ase":
            embedder = AdjacencySpectralEmbed(n_components=self.n_components,
                                              n_elbows=self.n_elbows)
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "lse":
            embedder = LaplacianSpectralEmbed(
                n_components=self.n_components,
                n_elbows=self.n_elbows,
                regularizer=self.regularizer,
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "unscaled_ase":
            embed_adj = augment_diagonal(embed_adj)
            embed = selectSVD(embed_adj,
                              n_components=self.n_components,
                              n_elbows=self.n_elbows)
            embed = (embed[0], embed[2].T)

        X = np.concatenate(embed, axis=1)

        fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds)
        print(f"Learning transformation with {fraction_paired} neurons paired")

        X = self._procrustes(X)

        if self.normalize:
            row_norms = np.linalg.norm(X, axis=1)
            X /= row_norms[:, None]

        return X
コード例 #14
0
def ase_procrust_svd(embed_adjs):
    ase = AdjacencySpectralEmbed(n_components=None)
    all_embeds = []
    for a in embed_adjs:
        embed = ase.fit_transform(a)
        embed = np.concatenate(embed, axis=1)
        embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes(
            embed[lp_inds], embed[rp_inds])[0])
        print(embed.shape)
        all_embeds.append(embed)
    cat_embed = np.concatenate(all_embeds, axis=1)
    print(cat_embed.shape)
    U, S, Vt = selectSVD(cat_embed, n_elbows=3)
    return U
コード例 #15
0
    def _embed(self, A1, A2):
        if self.n_components is None:
            num_dims1 = select_dimension(A1)[0][-1]
            num_dims2 = select_dimension(A2)[0][-1]
            self.n_components = max(num_dims1, num_dims2)

        ase = AdjacencySpectralEmbed(n_components=self.n_components)
        X1_hat = ase.fit_transform(A1)
        X2_hat = ase.fit_transform(A2)

        if isinstance(X1_hat, tuple) and isinstance(X2_hat, tuple):
            X1_hat = np.concatenate(X1_hat, axis=-1)
            X2_hat = np.concatenate(X2_hat, axis=-1)
        elif isinstance(X1_hat, tuple) ^ isinstance(X2_hat, tuple):
            msg = ("input graphs do not have same directedness. "
                   "consider symmetrizing the directed graph.")
            raise ValueError(msg)

        return X1_hat, X2_hat
コード例 #16
0
    pairs(joint_embed)
    stashfig(f"joint-embed-{name}")

# %% [markdown]
# ##
U = omni_procrust_svd(embed_adjs)

# %% [markdown]
# ##
from sklearn.metrics import pairwise_distances

ase = AdjacencySpectralEmbed(n_components=None)
all_embeds = []
all_pdists = []
for a in embed_adjs:
    both_embed = ase.fit_transform(a)
    # embed = np.concatenate(embed, axis=1)
    for embed in both_embed:
        embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes(
            embed[lp_inds], embed[rp_inds])[0])
        print(embed.shape)
        all_embeds.append(embed)
        pdist = pairwise_distances(embed, metric="cosine")
        all_pdists.append(pdist)

from mvlearn.embed import MVMDS

mvmds = MVMDS(n_components=6)
mv_embed = mvmds.fit_transform(all_pdists)

pairs(mv_embed)
コード例 #17
0
#%%
%matplotlib inline
from graspy.plot import *
from graspy.simulations import sbm
from graspy.embed import AdjacencySpectralEmbed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]])
ase = AdjacencySpectralEmbed()
X = ase.fit_transform(g)
labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2]
# pairplot(X, size=50, alpha=0.6)

plt.show()


コード例 #18
0
adj = mg.adj
adj = pass_to_ranks(adj)
meta["inds"] = range(len(meta))

left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]
lp_inds, rp_inds = get_paired_inds(meta)

# %% [markdown]
# ## Embed
# Here the embedding is ASE, with PTR and DiagAug, the number of embedding dimensions
# is for now set to ZG2 (4 + 4). Using the known pairs as "seeds", the left embedding
# is matched to the right using procrustes.
ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = ase.fit_transform(adj)
n_components = embed[0].shape[1]  # use all of ZG2
X = np.concatenate((embed[0][:, :n_components], embed[1][:, :n_components]),
                   axis=-1)
R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds])

if CLUSTER_SPLIT == "best":
    X[left_inds] = X[left_inds] @ R

# %% [markdown]
# ## Clustering
# Clustering is performed using Gaussian mixture modeling. At each candidate value of k,
# 50 models are trained on the left embedding, 50 models are trained on the right
# embedding (choosing the best covariance structure based on BIC on the train set).
results = crossval_cluster(
    X,
コード例 #19
0
#%%

from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed
from graspy.utils import pass_to_ranks
from graspy.plot import pairplot


sum_adj = np.sum(np.array(mb_color_graphs), axis=0)

n_components = 4

#
ptr_adj = pass_to_ranks(sum_adj)
ase = AdjacencySpectralEmbed(n_components=n_components)
sum_latent = ase.fit_transform(ptr_adj)
sum_latent = np.concatenate(sum_latent, axis=-1)
pairplot(sum_latent, labels=mb_class_labels)

ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs]
# graph_sum = [np.sum(a) for a in mb_color_graphs]
# ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)]
omni = OmnibusEmbed(n_components=n_components // 4)
color_latent = omni.fit_transform(ptr_color_adjs)
color_latent = np.concatenate(color_latent, axis=-1)
color_latent = np.concatenate(color_latent, axis=-1)
pairplot(color_latent, labels=mb_class_labels)

from graspy.embed import MultipleASE

mase = MultipleASE(n_components=n_components)
コード例 #20
0
conf_mat = confusion_matrix(right_int_labels, pred_labels)
conf_mat = conf_mat[:, ]
sns.heatmap(conf_mat, annot=True)

#%%

from graspy.embed import OmnibusEmbed, AdjacencySpectralEmbed
from scipy.linalg import orthogonal_procrustes

sns.set_palette("deep")
# omni = OmnibusEmbed(n_components=2)
# latent = omni.fit_transform([right_graph, gs.model_.p_mat_])
# latent = np.concatenate(latent, axis=-1)
n_components = 3
ase = AdjacencySpectralEmbed(n_components=n_components)
latent = ase.fit_transform(right_graph)
latent = np.concatenate(latent, axis=-1)

p_latent = ase.fit_transform(gs.model_.p_mat_)
p_latent = np.concatenate(p_latent, axis=-1)

R, scale = orthogonal_procrustes(p_latent, latent)
p_latent = p_latent @ R

n_components *= 2

scatter_kws = dict(legend=False, linewidth=0, s=30)
cmap1 = sns.color_palette("Set1", n_colors=4)
cmap2 = np.array(sns.color_palette("Set1", n_colors=4, desat=0.4))
cmap2 = cmap2[[3, 0, 1, 2]]
cmap2 = list(cmap2)
コード例 #21
0
    simultaneous=simultaneous,
    stop_nodes=source_inds,
    max_hops=max_hops,
    allow_loops=False,
)
back_hop_hist = td.multistart(out_inds)
back_hop_hist = back_hop_hist.T

full_hop_hist = np.concatenate((fwd_hop_hist, back_hop_hist), axis=0)


# %% [markdown]
# ##

embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = embedder.fit_transform(pass_to_ranks(adj))
embed = np.concatenate(embed, axis=-1)

lp_inds, rp_inds = get_paired_inds(meta)
R, _, = orthogonal_procrustes(embed[lp_inds], embed[rp_inds])

left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]
embed[left_inds] = embed[left_inds] @ R


# %% [markdown]
# ##

joint = np.concatenate((embed, full_hop_hist.T), axis=1)
コード例 #22
0
right_inds = meta[meta["right"]]["inds"]


def remove_axis(ax):
    remove_spines(ax)
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_xticks([])
    ax.set_yticks([])


method = "ortho"

print("Embedding graph...")
embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
in_embed, out_embed = embedder.fit_transform(pass_to_ranks(adj))
procrust = Procrustes(method=method)
# procrust.fit(in_embed, x_seeds=lp_inds, y_seeds=rp_inds)
embed = np.concatenate((in_embed, out_embed), axis=-1)

dim1 = 0
dim2 = 4

fig, axs = plt.subplots(2, 2, figsize=(20, 20))
plot_df = pd.DataFrame(data=embed[:, [0, 1]])
plot_df["merge_class"] = meta["merge_class"].values
ax = axs[0, 0]
sns.scatterplot(
    data=plot_df,
    x=0,
    y=1,
コード例 #23
0
from graspy.simulations import sbm
from numpy.core.shape_base import block
from sklearn.mixture import GaussianMixture

sns.set_context("talk")

n_per_comm = [1000, 1000, 1000]
n_verts = np.sum(n_per_comm)
block_probs = np.array([[0.5, 0.1, 0.1], [0.1, 0.5, 0.1], [0.1, 0.1, 0.5]])

adj, labels = sbm(n_per_comm, block_probs, return_labels=True)

# %%

ase = AdjacencySpectralEmbed(n_components=3)
Xhat = ase.fit_transform(adj)

# %%


# REF: Anton
def _fit_plug_in_variance_estimator(X):
    """
    Takes in ASE of a graph and returns a function that estimates
    the variance-covariance matrix at a given point using the
    plug-in estimator from the RDPG Central Limit Theorem.

    Parameters
    ----------
    X : np.ndarray, shape (n, d)
        adjacency spectral embedding of a graph
コード例 #24
0
def simulation(n,
               pi,
               normal_params,
               beta_params,
               cond_ind=True,
               errors=None,
               smooth=False,
               acorn=None):
    #- Type checks
    if isinstance(normal_params, list):
        sbm_check = False
        # there are other checks to do..
    elif isinstance(normal_params, np.ndarray):
        if normal_params.ndim is 2:
            if np.sum(normal_params == normal_params.T) == np.prod(
                    normal_params.shape):
                sbm_check = True
            else:
                msg = 'if normal_params is a 2 dimensional array it must be symmetric'
                raise ValueError(msg)
        else:
            msg = 'if normal_params is an array, it must be a 2 dimensional array'
            raise TypeError(msg)
    else:
        msg = 'normal_params must be either a list or a 2 dimensional array'
        raise TypeError(msg)

    if acorn is None:
        acorn = np.random.randint(10**6)
    np.random.seed(acorn)

    #- Multinomial trial
    counts = np.random.multinomial(n, [pi, 1 - pi])

    #- Hard code the number of blocks
    K = 2

    #- Set labels
    labels = np.concatenate((np.zeros(counts[0]), np.ones(counts[1])))

    #- number of seeds = n_{i}/10
    n_seeds = np.round(0.1 * counts).astype(int)

    #- Set training and test data
    class_train_idx = [
        range(np.sum(counts[:k]),
              np.sum(counts[:k]) + n_seeds[k]) for k in range(K)
    ]
    train_idx = np.concatenate((class_train_idx)).astype(int)

    test_idx = [k for k in range(n) if k not in train_idx]

    #- Total number of seeds
    m = np.sum(n_seeds)

    #- estimate class probabilities
    pi_hats = n_seeds / m

    #- Sample from beta distributions
    beta_samples = beta_sampler(counts, beta_params)
    Z = beta_samples

    #- Sample from multivariate normal or SBM either independently of Zs or otherwise
    if cond_ind:
        if sbm_check:
            A = sbm(counts, normal_params)
            ase_obj = ASE(n_elbows=1)
            X = ase_obj.fit_transform(A)
        else:
            X = MVN_sampler(counts, normal_params)
            if len(normal_params[0][0]) is 1:
                X = X[:, np.newaxis]
    else:
        if sbm_check:
            P = blowup(
                normal_params, counts
            )  # A big version of B to be able to change connectivity probabilities of individual nodes
            scales = np.prod(Z, axis=1)**(
                1 / Z.shape[1]
            )  # would do just the outer product, but if the Z's are too small we risk not being connected
            new_P = P * (scales @ scale.T)  # new probability matrix
            A = sbm(np.ones(n).astype(int), new_P)
            ase_obj = ASE(n_elbows=1)
            X = ase_obj.fit_transform(A)
        else:
            X = conditional_MVN_sampler(Z=Z,
                                        rho=1,
                                        counts=counts,
                                        params=normal_params,
                                        seed=None)
            if len(normal_params[0][0]) is 1:
                X = X[:, np.newaxis]

    XZ = np.concatenate((X, Z), axis=1)

    #- Estimate normal parameters using seeds
    params = []
    for i in range(K):
        temp_mu, temp_cov = estimate_normal_parameters(X[class_train_idx[i]])
        params.append([temp_mu, temp_cov])

    #- Using conditional indendence assumption (RF, KNN used for posterior estimates)
    if errors is None:
        errors = [[] for i in range(5)]

    rf1 = RF(n_estimators=100,
             max_depth=int(np.round(np.log(Z[train_idx].shape[0]))))
    rf1.fit(Z[train_idx], labels[train_idx])

    knn1 = KNN(n_neighbors=int(np.round(np.log(Z[train_idx].shape[0]))))
    knn1.fit(Z[train_idx], labels[train_idx])

    if smooth:
        temp_pred = classify(X[test_idx], Z[test_idx], params, rf1, m=m)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[0].append(temp_error)

        temp_pred = classify(X[test_idx], Z[test_idx], params, knn1, m=m)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[1].append(temp_error)
    else:
        temp_pred = classify(X[test_idx], Z[test_idx], params, rf1)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[0].append(temp_error)

        knn1 = KNN(n_neighbors=int(np.round(np.log(m))))
        knn1.fit(Z[train_idx], labels[train_idx])

        temp_pred = classify(X[test_idx], Z[test_idx], params, knn1)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[1].append(temp_error)

    temp_pred = QDA(X[test_idx], pi_hats, params)
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[2].append(temp_error)

    #- Not using conditional independence assumption (RF, KNN used for classification)
    XZseeds = np.concatenate((X[train_idx], Z[train_idx]), axis=1)

    rf2 = RF(n_estimators=100, max_depth=int(np.round(np.log(m))))
    rf2.fit(XZ[train_idx], labels[train_idx])
    temp_pred = rf2.predict(XZ[test_idx])
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[3].append(temp_error)

    knn2 = KNN(n_neighbors=int(np.round(np.log(m))))
    knn2.fit(XZ[train_idx], labels[train_idx])

    temp_pred = knn2.predict(XZ[test_idx])
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[4].append(temp_error)

    temp_accuracy = GCN(adj, features, train_idx, labels)
    temp_error = 1 - temp_accuracy
    errors[5].append(temp_error)

    return errors
コード例 #25
0
def _ase_embed(mat,
               atlas,
               graph_path,
               ID,
               subgraph_name="all_nodes",
               n_components=None,
               prune=0,
               norm=1):
    """

    Class for computing the adjacency spectral embedding of a graph.

    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean
    representation of the graph based on its adjacency matrix. It relies on an
    SVD to reduce the dimensionality to the specified k, or if k is
    unspecified, can find a number of dimensions automatically

    Parameters
    ----------
    mat : ndarray or nx.Graph
        An nxn adjacency matrix or graph object.
    atlas : str
        The name of an atlas (indicating the node definition).
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing ASE embedding tensor.

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
      Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
      Journal of the American Statistical Association, Vol. 107(499), 2012

    """
    import os
    import networkx as nx
    import numpy as np
    from pynets.core.utils import flatten
    from graspy.embed import AdjacencySpectralEmbed
    from joblib import dump
    from pynets.stats.netstats import CleanGraphs
    #from graspy.utils import get_lcc

    # Adjacency Spectral embedding
    print(f"{'Embedding unimodal asetome for atlas: '}{atlas} and "
          f"{subgraph_name}{'...'}")
    ase = AdjacencySpectralEmbed(n_components=n_components)
    cg = CleanGraphs(None, None, graph_path, prune, norm)

    if float(norm) >= 1:
        G = cg.normalize_graph()
        mat_clean = nx.to_numpy_array(G)
    else:
        mat_clean = mat

    if float(prune) >= 1:
        graph_path_tmp = cg.prune_graph()[1]
        mat_clean = np.load(graph_path_tmp)
    else:
        mat_clean = mat

    ase_fit = ase.fit_transform(mat_clean)

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = f"{namer_dir}/gradient-ASE" \
               f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}"
    # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \
    #                f"_{subgraph_name}" \
    #                f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib"

    #dump(ase, out_path_est)

    print("Saving...")
    np.save(out_path, ase_fit)
    del ase, ase_fit

    return out_path
コード例 #26
0
# %% [markdown]
# ##
matrixplot(
    path_indicator_mat[:50, :50],
    plot_type="scattermap",
    sizes=(0.2, 0.2),
    hue="weight",
    palette=sns.color_palette("husl", n_colors=10),
    ax=ax,
)

# %% [markdown]
# ##

embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = embedder.fit_transform(adj)
embed = np.concatenate(embed, axis=-1)
pairplot(embed, labels=labels, palette="tab20")

# %% [markdown]
# ## Run paths
print(f"Running {n_init} random walks from each source node...")

transition_probs = to_markov_matrix(adj)

out_inds = np.where(labels == n_blocks - 1)[0]
source_inds = np.where(labels == 0)[0]


def rw_from_node(s):
    paths = []
コード例 #27
0
ファイル: embeddings.py プロジェクト: ryanhammonds/PyNets
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"):
    """

    Class for computing the adjacency spectral embedding of a graph.

    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation
    of the graph based on its adjacency matrix. It relies on an SVD to reduce
    the dimensionality to the specified k, or if k is unspecified, can find a number of
    dimensions automatically

    Parameters
    ----------
    graphs : list of nx.Graph or ndarray, or ndarray
        If list of nx.Graph, each Graph must contain same number of nodes.
        If list of ndarray, each array must have shape (n_vertices, n_vertices).
        If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
    atlas : str
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing ASE embedding tensor.

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
      Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
      Journal of the American Statistical Association, Vol. 107(499), 2012

    """
    import numpy as np
    from pynets.core.utils import flatten
    from graspy.embed import AdjacencySpectralEmbed
    from joblib import dump
    from graspy.utils import get_lcc

    # Adjacency Spectral embedding
    print(
        f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}"
    )
    ase = AdjacencySpectralEmbed()
    ase_fit = ase.fit_transform(get_lcc(mat))

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy"
    out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib"

    dump(ase, out_path_est)

    print("Saving...")
    np.save(out_path, ase_fit)
    del ase, ase_fit

    return out_path
コード例 #28
0
    "APL": "MBIN",
    "sens": "ORN",
}

rows = []

for side in ["left", "right"]:
    print(side)
    side_mb_mg = side_mgs[side]
    labels = side_mb_mg.meta["class1"].values
    labels = np.vectorize(label_map.get)(labels)
    plot_labels = side_mb_mg.meta["merge_class"].values

    # embed
    ase = AdjacencySpectralEmbed(n_components=None, algorithm="randomized")
    embed = ase.fit_transform(pass_to_ranks(side_mb_mg.adj))
    embed = np.concatenate(embed, axis=1)

    # cluster using AutoGMM
    method = "AutoGMM"
    agmm = AutoGMMCluster(
        min_components=2,
        max_components=10,
        affinity=["euclidean", "manhattan", "cosine"],
        covariance_type=["full"],
        n_jobs=-1,
    )
    agmm.fit(embed, labels)
    agmm_results = agmm.results_.copy()
    agmm_results.sort_values("bic/aic", inplace=True)
    agmm_model = agmm.model_
コード例 #29
0
block_p_hat = sbme.block_p_
block_heatmap(block_p_hat, title=r"Observed $\hat{B}$")
block_p_hat_unscaled = block_p_hat * 1 / scaling_factor
block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)")

# %% [markdown]
# ## Spectral embedding
# Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the
# SVDs here. There is an option on whether to throw out the first eigenvector.
#%% embeddings
embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False)
ase = AdjacencySpectralEmbed(**embed_kws)
lse = LaplacianSpectralEmbed(form="DAD", **embed_kws)
rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws)

ase_embed = ase.fit_transform(adj)
lse_embed = lse.fit_transform(adj)
rlse_embed = rlse.fit_transform(adj)
embeddings_list = [ase_embed, lse_embed, rlse_embed]

remove_first = True
for i, embedding in enumerate(embeddings_list):
    if remove_first:
        embeddings_list[i] = embedding[:, 1:]
    else:
        embeddings_list[i] = embedding[:, :k]

#%% setting up for plotting
degrees = adj.sum(axis=1)
methods = ["ase", "lse", "rlse"]
embeddings = dict(zip(methods, embeddings_list))
コード例 #30
0
n_verts = 200
p = 0.5
n_components = 1
n_sims = 1000

# Run experiment

estimated_latents = np.zeros((n_sims, 2))
for i in range(n_sims):
    graph = er_np(n_verts, p, directed=False, loops=False)

    ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True)

    ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False)

    diag_latent = ase_diag.fit_transform(graph)
    ase_latent = ase.fit_transform(graph)

    mean_diag_latent = np.mean(diag_latent)
    mean_latent = np.mean(ase_latent)
    estimated_latents[i, 0] = mean_diag_latent
    estimated_latents[i, 1] = mean_latent

diffs = estimated_latents - np.sqrt(p)  # the true latent position is sqrt(p)

# Plot results

plt.style.use("seaborn-white")
sns.set_palette("deep")
sns.set_context("talk", font_scale=1)