Beispiel #1
0
def test_ase_three_blocks():
    """
    Expect 3 clusters from a 3 block model
    """
    np.random.seed(1)

    # Generate adjacency and labels
    n = 50
    n_communites = [n, n, n]
    p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
    y = np.repeat([1, 2, 3], repeats=n)

    A = sbm(n=n_communites, p=p)

    # Embed to get latent positions
    ase = AdjacencySpectralEmbed(n_components=5)
    X_hat = ase.fit_transform(A)

    # Compute clusters
    AutoGMM = AutoGMMCluster(max_components=10)
    AutoGMM.fit(X_hat, y)

    n_components = AutoGMM.n_components_

    # Assert that the three cluster model is the best
    assert_equal(n_components, 3)

    # Asser that we get perfect clustering
    assert_allclose(AutoGMM.ari_, 1)
Beispiel #2
0
    def test_ase_three_blocks(self):
        """
        Expect 3 clusters from a 3 block model
        """
        np.random.seed(3)
        num_sims = 10

        # Generate adjacency and labels
        n = 50
        n_communites = [n, n, n]
        p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
        y = np.repeat([1, 2, 3], repeats=n)

        for _ in range(num_sims):
            A = sbm(n=n_communites, p=p)

            # Embed to get latent positions
            ase = AdjacencySpectralEmbed(n_components=5)
            X_hat = ase.fit_transform(A)

            # Compute clusters
            gclust = GaussianCluster(min_components=10)
            gclust.fit(X_hat, y)

            n_components = gclust.n_components_

            # Assert that the three cluster model is the best
            assert_equal(n_components, 3)

            # Asser that we get perfect clustering
            assert_allclose(gclust.ari_.loc[n_components], 1)
Beispiel #3
0
 def test_transform_correct_types(self):
     ase = AdjacencySpectralEmbed(n_components=2)
     for graph in self.testgraphs.values():
         A, a = remove_vertices(graph, 1, return_removed=True)
         ase.fit(A)
         directed = ase.latent_right_ is not None
         weighted = not np.array_equal(A, A.astype(bool))
         w = ase.transform(a)
         if directed:
             self.assertIsInstance(w, tuple)
             self.assertIsInstance(w[0], np.ndarray)
             self.assertIsInstance(w[1], np.ndarray)
         elif not directed:
             self.assertIsInstance(w, np.ndarray)
             self.assertEqual(np.atleast_2d(w).shape[1], 2)
Beispiel #4
0
    def test_directed_correct_latent_positions(self):
        # setup
        ase = AdjacencySpectralEmbed(n_components=3)
        P = np.array([[0.9, 0.1, 0.1], [0.3, 0.6, 0.1], [0.1, 0.5, 0.6]])
        M, labels = sbm([200, 200, 200], P, directed=True, return_labels=True)

        # one node from each community
        oos_idx = np.nonzero(np.r_[1, np.diff(labels)[:-1]])[0]
        labels = list(labels)
        oos_labels = [labels.pop(i) for i in oos_idx]

        # Grab out-of-sample, fit, transform
        A, a = remove_vertices(M, indices=oos_idx, return_removed=True)
        latent_left, latent_right = ase.fit_transform(A)
        oos_left, oos_right = ase.transform(a)

        # separate into communities
        for i, latent in enumerate([latent_left, latent_right]):
            left = i == 0
            df = pd.DataFrame(
                {
                    "Type": labels,
                    "Dimension 1": latent[:, 0],
                    "Dimension 2": latent[:, 1],
                    "Dimension 3": latent[:, 2],
                }
            )
            # make sure that oos vertices are closer to their true community averages than other community averages
            means = df.groupby("Type").mean()
            if left:
                avg_dist_within = np.diag(pairwise_distances(means, oos_left))
                avg_dist_between = np.diag(pairwise_distances(means, oos_right))
                self.assertTrue(all(avg_dist_within < avg_dist_between))
            elif not left:
                avg_dist_within = np.diag(pairwise_distances(means, oos_right))
                avg_dist_between = np.diag(pairwise_distances(means, oos_left))
                self.assertTrue(all(avg_dist_within < avg_dist_between))
Beispiel #5
0
 def setUp(self):
     n = [10, 10]
     p = np.array([[0.9, 0.1], [0.1, 0.9]])
     wt = [[normal, poisson], [poisson, normal]]
     wtargs = [
         [dict(loc=3, scale=1), dict(lam=5)],
         [dict(lam=5), dict(loc=3, scale=1)],
     ]
     self.testgraphs = dict(
         Guw=sbm(n=n, p=p),
         Gw=sbm(n=n, p=p, wt=wt, wtargs=wtargs),
         Guwd=sbm(n=n, p=p, directed=True),
         Gwd=sbm(n=n, p=p, wt=wt, wtargs=wtargs, directed=True),
     )
     self.ase = AdjacencySpectralEmbed(n_components=2, svd_seed=9001)
Beispiel #6
0
 def test_transform_closeto_fit_transform(self):
     atol = 0.15
     for diag_aug in [True, False]:
         for g, A in self.testgraphs.items():
             ase = AdjacencySpectralEmbed(n_components=2, diag_aug=diag_aug)
             ase.fit(A)
             Y = ase.fit_transform(A)
             if isinstance(Y, np.ndarray):
                 X = ase.transform(A)
                 self.assertTrue(np.allclose(X, Y, atol=atol))
             elif isinstance(Y, tuple):
                 with self.assertRaises(TypeError):
                     X = ase.transform(A)
                 X = ase.transform((A.T, A))
                 self.assertTrue(np.allclose(X[0], Y[0], atol=atol))
                 self.assertTrue(np.allclose(X[1], Y[1], atol=atol))
             else:
                 raise TypeError
Beispiel #7
0
 def test_unconnected_warning(self):
     A = csr_matrix(er_nm(100, 10))
     with self.assertWarns(UserWarning):
         ase = AdjacencySpectralEmbed()
         ase.fit(A)
Beispiel #8
0
 def test_transform_networkx(self):
     G = nx.grid_2d_graph(5, 5)
     ase = AdjacencySpectralEmbed(n_components=2)
     ase.fit(G)
     ase.transform(G)
Beispiel #9
0
 def test_input_checks(self):
     with self.assertRaises(TypeError):
         ase = AdjacencySpectralEmbed(diag_aug="over 9000")
         ase.fit()
Beispiel #10
0
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="all_nodes",
               n_components=None, prune=0, norm=1):
    """

    Class for computing the adjacency spectral embedding of a graph.

    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean
    representation of the graph based on its adjacency matrix. It relies on an
    SVD to reduce the dimensionality to the specified k, or if k is
    unspecified, can find a number of dimensions automatically

    Parameters
    ----------
    mat : ndarray or nx.Graph
        An nxn adjacency matrix or graph object.
    atlas : str
        The name of an atlas (indicating the node definition).
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing ASE embedding tensor.

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or
    V) are ordered according to the amount of variance they explain in the
    original matrix. By selecting a subset of these basis vectors (through
    our choice of dimensionality reduction) we can find a lower dimensional
    space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
      Consistent Adjacency Spectral Embedding for Stochastic Blockmodel
      Graphs," Journal of the American Statistical Association,
      Vol. 107(499), 2012

    """
    import os
    import networkx as nx
    import numpy as np
    from pynets.core.utils import flatten
    from graspologic.embed.ase import AdjacencySpectralEmbed
    from joblib import dump
    from pynets.stats.netstats import CleanGraphs

    # Adjacency Spectral embedding
    print(
        f"{'Embedding unimodal asetome for atlas: '}{atlas} and "
        f"{subgraph_name}{'...'}"
    )
    ase = AdjacencySpectralEmbed(n_components=n_components)
    cg = CleanGraphs(None, None, graph_path, prune, norm)

    if float(norm) >= 1:
        G = cg.normalize_graph()
        mat_clean = nx.to_numpy_array(G)
    else:
        mat_clean = mat

    if float(prune) >= 1:
        graph_path_tmp = cg.prune_graph()[1]
        mat_clean = np.load(graph_path_tmp)

    mat_clean[np.where(np.isnan(mat_clean) | np.isinf(mat_clean))] = 0

    if (np.abs(mat_clean) < 0.0000001).all() or np.isnan(np.sum(mat_clean)):
        return None

    ase_fit = ase.fit_transform(mat_clean)

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if os.path.isdir(namer_dir) is False:
        os.makedirs(namer_dir, exist_ok=True)

    out_path = f"{namer_dir}/gradient-ASE" \
               f"_{atlas}_{subgraph_name}_{os.path.basename(graph_path)}"
    # out_path_est = f"{namer_dir}/gradient-ASE_{atlas}" \
    #                f"_{subgraph_name}" \
    #                f"_{os.path.basename(graph_path).split('.npy')[0]}.joblib"

    #dump(ase, out_path_est)

    print("Saving...")
    np.save(out_path, ase_fit)
    del ase, ase_fit

    return out_path
Beispiel #11
0
import pytest
import numpy as np
from graspologic.embed.ase import AdjacencySpectralEmbed
from graspologic.simulations.simulations import sbm
from graspologic.nominate import SpectralVertexNomination

# global constants for tests
n_verts = 50
p = np.array([[0.7, 0.25, 0.2], [0.25, 0.8, 0.3], [0.2, 0.3, 0.85]])
labels = np.array([0] * n_verts + [1] * n_verts + [2] * n_verts)
adj = np.array(sbm(3 * [n_verts], p), dtype=np.int)
embeder = AdjacencySpectralEmbed()
pre_embeded = embeder.fit_transform(adj)


def _nominate(X, seed, nominator=None, k=None):
    if nominator is None:
        nominator = SpectralVertexNomination(n_neighbors=k)
    nominator.fit(X)
    n_verts = X.shape[0]
    nom_list, dists = nominator.predict(seed)
    assert nom_list.shape == (n_verts, seed.shape[0])
    assert dists.shape == (n_verts, seed.shape[0])
    return nom_list


def _test_seed_input_dimensions():
    with pytest.raises(IndexError):
        _nominate(adj, np.zeros((5, 5, 5), dtype=np.int))

Beispiel #12
0
 def test_unconnected_warning(self):
     A = er_nm(100, 10)
     with pytest.warns(UserWarning):
         ase = AdjacencySpectralEmbed()
         ase.fit(A)