Esempio n. 1
0
    def test_sim(self):
        n = 150
        rho = 0.9
        n_per_block = int(n / 3)
        n_blocks = 3
        block_members = np.array(n_blocks * [n_per_block])
        block_probs = np.array([[0.2, 0.01, 0.01], [0.01, 0.1, 0.01],
                                [0.01, 0.01, 0.2]])
        directed = False
        loops = False
        A1, A2 = sbm_corr(block_members,
                          block_probs,
                          rho,
                          directed=directed,
                          loops=loops)
        ase = AdjacencySpectralEmbed(n_components=3, algorithm="truncated")
        x1 = ase.fit_transform(A1)
        x2 = ase.fit_transform(A2)
        xh1 = SignFlips().fit_transform(x1, x2)
        S = xh1 @ x2.T
        res = self.barygm.fit(A1, A2, S=S)

        self.assertTrue(0.7 <= (sum(res.perm_inds_ == np.arange(n)) / n))

        A1 = A1[:-1, :-1]
        xh1 = xh1[:-1, :]
        S = xh1 @ x2.T

        res = self.barygm.fit(A1, A2, S=S)

        self.assertTrue(0.6 <= (sum(res.perm_inds_ == np.arange(n)) / n))
def embed(adj, n_components=40, ptr=False):
    if ptr:
        adj = pass_to_ranks(adj)
    elbow_inds, elbow_vals = select_dimension(augment_diagonal(adj),
                                              n_elbows=4)
    elbow_inds = np.array(elbow_inds)
    ase = AdjacencySpectralEmbed(n_components=n_components)
    out_latent, in_latent = ase.fit_transform(adj)
    return out_latent, in_latent, ase.singular_values_, elbow_inds
Esempio n. 3
0
    def test_dimensions(self):
        graph = self.graph.copy()
        sparse = nx.to_scipy_sparse_matrix(graph)
        ranked = graspologic.utils.pass_to_ranks(sparse)
        ase = AdjacencySpectralEmbed(n_components=100, n_elbows=None, svd_seed=1234)
        core_response = ase.fit_transform(ranked)

        embedding = adjacency_spectral_embedding(self.graph.copy(), svd_seed=1234)
        np.testing.assert_array_almost_equal(core_response, embedding.embeddings())
Esempio n. 4
0
def spectral_clustering(adj, n_components=4, method="lse", return_embedding=False):
    if method == "ase":
        embedder = AdjacencySpectralEmbed(n_components=n_components)
    elif method == "lse":
        embedder = LaplacianSpectralEmbed(n_components=n_components, form="R-DAD")
    latent = embedder.fit_transform(adj)
    gc = AutoGMMCluster(min_components=4, max_components=4)
    pred_labels = gc.fit_predict(latent)
    if return_embedding:
        return pred_labels, latent
    else:
        return pred_labels
Esempio n. 5
0
 def test_passing_embeddings(self):
     np.random.seed(123)
     A1 = er_np(20, 0.8)
     A2 = er_np(20, 0.8)
     ase_1 = AdjacencySpectralEmbed(n_components=2)
     X1 = ase_1.fit_transform(A1)
     ase_2 = AdjacencySpectralEmbed(n_components=2)
     X2 = ase_2.fit_transform(A2)
     ase_3 = AdjacencySpectralEmbed(n_components=1)
     X3 = ase_3.fit_transform(A2)
     # check embeddings having weird ndim
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X2.reshape(-1, 1, 1))
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1.reshape(-1, 1, 1), X2)
     # check embeddings having mismatching number of components
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X3)
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X3, X1)
     # check passing weird stuff as input (caught by us)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict("hello there", X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, "hello there")
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict({"hello": "there"}, X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, {"hello": "there"})
     # check passing infinite in input (caught by check_array)
     with self.assertRaises(ValueError):
         X1_w_inf = X1.copy()
         X1_w_inf[1, 1] = np.inf
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1_w_inf, X2)
     # check that the appropriate input works
     ldt = LatentDistributionTest(input_graph=False)
     ldt.fit_predict(X1, X2)
Esempio n. 6
0
    def test_different_aligners(self):
        np.random.seed(314)
        A1 = er_np(100, 0.8)
        A2 = er_np(100, 0.8)
        ase_1 = AdjacencySpectralEmbed(n_components=2)
        X1 = ase_1.fit_transform(A1)
        ase_2 = AdjacencySpectralEmbed(n_components=2)
        X2 = ase_2.fit_transform(A2)
        X2 = -X2

        ldt_1 = latent_distribution_test(X1, X2, input_graph=False, align_type=None)
        self.assertTrue(ldt_1[1] < 0.05)

        ldt_2 = latent_distribution_test(
            X1, X2, input_graph=False, align_type="sign_flips"
        )
        self.assertTrue(ldt_2[1] >= 0.05)

        # also checking that kws are passed through
        ldt_3 = latent_distribution_test(
            X1,
            X2,
            input_graph=False,
            align_type="seedless_procrustes",
            align_kws={"init": "sign_flips"},
        )
        self.assertTrue(ldt_3[1] >= 0.05)
def community_estimation(G1, G2=None, min_components=2, max_components=None):
    """
    Estimate the community assignments of the vertices of a single random graph or a pair
    when estimate for pair of graphs, assuming the two graphs have the same community structure
    First jointly embed G1 and G2, then cluster the embedding by GMM
    Parameters
    ----------
    G1: ndarray (n_vertices, n_vertices)
        Adjacency matrix representing the first random graph.
    G2: ndarray (n_vertices, n_vertices), default=None
        Adjacency matrix representing the second random graph.
    min_components : int, default=2.
        The minimum number of mixture components to consider (unless
        ``max_components=None``, in which case this is the maximum number of
        components to consider). If ``max_componens`` is not None, ``min_components``
        must be less than or equal to ``max_components``.
    max_components : int or None, default=None.
        The maximum number of mixture components to consider. Must be greater
        than or equal to ``min_components``.
    Returns
    --------
    Zhat: ndarray (n_vertices)
        Vector representing the estimated community assignments of each vertex (zero-indexed)
        Example: if Zhat[i] == 2, then the ith vertex is estimated to belong to the 3rd community
    """
    if G2 is None:
        Vhat = AdjacencySpectralEmbed().fit_transform(G1)
    else:
        Vhat = MultipleASE().fit_transform([G1, G2])
    # TODO: use graspologic.cluster.gclust after the bug is fixed
    # for now, manual iterate over GaussianMixture
    # Deal with number of clusters
    if max_components is None:
        lower_ncomponents = 1
        upper_ncomponents = min_components
    else:
        lower_ncomponents = min_components
        upper_ncomponents = max_components

    # the number of components we need to iterate through
    n_mixture_components = upper_ncomponents - lower_ncomponents + 1

    models = []
    bics = []
    for i in range(n_mixture_components):
        model = GaussianMixture(n_components=i + lower_ncomponents).fit(Vhat)
        models.append(model)
        bics.append(model.bic(Vhat))
    best_model = models[np.argmin(bics)]
    Zhat = best_model.predict(Vhat)
    return Zhat
Esempio n. 8
0
        ax=ax,
        plot_type="scattermap",
        sizes=(1, 1),
        color=palette["Ipsi"])
ax = axs[1]
adjplot(contra_adj,
        ax=ax,
        plot_type="scattermap",
        sizes=(1, 1),
        color=palette["Contra"])

#%%
max_n_components = 40
from giskard.utils import careys_rule

ase = AdjacencySpectralEmbed(n_components=max_n_components)
ipsi_out_latent, ipsi_in_latent = ase.fit_transform(ipsi_adj)
ipsi_singular_values = ase.singular_values_
contra_out_latent, contra_in_latent = ase.fit_transform(contra_adj)
contra_singular_values = ase.singular_values_

#%%
check_n_components = careys_rule(ipsi_adj)
ax = screeplot(
    ipsi_singular_values,
    label="Ipsi",
    color=palette["Ipsi"],
    check_n_components=check_n_components,
)
screeplot(
    contra_singular_values,
def adjacency_spectral_embedding(
    graph: Union[nx.Graph, nx.DiGraph, nx.OrderedGraph, nx.OrderedDiGraph],
    dimensions: int = 100,
    elbow_cut: Optional[int] = None,
    svd_solver_algorithm: SvdAlgorithmType = "randomized",
    svd_solver_iterations: int = 5,
    svd_seed: Optional[int] = None,
    weight_attribute: str = "weight",
) -> Embeddings:
    """
    Given a directed or undirected networkx graph (*not* multigraph), generate an
    Embeddings object.

    Adjacency spectral embeddings are extremely egocentric, implying that results are
    slanted toward the core-periphery of each node. This is in contrast to Laplacian
    spectral embeddings, which look further into the latent space when it captures
    change.

    `Adjacency Spectral Embedding Tutorial
    <https://microsoft.github.io/graspologic/tutorials/embedding/AdjacencySpectralEmbed.html>`_

    Graphs will always have their diagonal augmented. In other words, a self-loop
    will be created for each node with a weight corresponding to the weighted degree.

    Lastly, all weights will be rescaled based on their relative rank in the graph,
    which is beneficial in minimizing anomalous results if some edge weights are
    extremely atypical of the rest of the graph.

    Parameters
    ----------
    graph : Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph]
        An undirected or directed graph. The graph **must**:

        - be fully numerically weighted (every edge must have a real, numeric weight
          or else it will be treated as an unweighted graph)
        - be a basic graph (meaning it should not be a multigraph; if you have a
          multigraph you must first decide how you want to handle the weights of the
          edges between two nodes, whether summed, averaged, last-wins,
          maximum-weight-only, etc)
    dimensions : int (default=100)
        Dimensions to use for the svd solver.
        For undirected graphs, if ``elbow_cut==None``, you will receive an embedding
        that has ``nodes`` rows and ``dimensions`` columns.
        For directed graphs, if ``elbow_cut==None``, you will receive an embedding that
        has ``nodes`` rows and ``2*dimensions`` columns.
        If ``elbow_cut`` is specified to be not ``None``, we will cut the embedding at
        ``elbow_cut`` elbow, but the provided ``dimensions`` will be used in the
        creation of the SVD.
    elbow_cut : Optional[int] (default=None)
        Using a process described by Zhu & Ghodsi in their paper "Automatic
        dimensionality selection from the scree plot via the use of profile likelihood",
        truncate the dimensionality of the return on the ``elbow_cut``-th elbow.
        By default this value is ``None`` but can be used to reduce the dimensionality
        of the returned tensors.
    svd_solver_algorithm : str (default="randomized")
        allowed values: {'randomized', 'full', 'truncated'}

        SVD solver to use:

            - 'randomized'
                Computes randomized svd using
                :func:`sklearn.utils.extmath.randomized_svd`
            - 'full'
                Computes full svd using :func:`scipy.linalg.svd`
                Does not support ``graph`` input of type scipy.sparse.csr_matrix
            - 'truncated'
                Computes truncated svd using :func:`scipy.sparse.linalg.svds`
    svd_solver_iterations : int (default=5)
        Number of iterations for randomized SVD solver. Not used by 'full' or
        'truncated'. The default is larger than the default in randomized_svd
        to handle sparse matrices that may have large slowly decaying spectrum.
    svd_seed : Optional[int] (default=None)
        Used to seed the PRNG used in the ``randomized`` svd solver algorithm.
    weight_attribute : str (default="weight")
        The edge dictionary key that contains the weight of the edge.

    Returns
    -------
    Embeddings

    Raises
    ------
    beartype.roar.BeartypeCallHintParamViolation if parameters do not match type hints
    ValueError if values are not within appropriate ranges or allowed values

    See Also
    --------
    graspologic.pipeline.embed.Embeddings
    graspologic.embed.AdjacencySpectralEmbed
    graspologic.embed.select_svd

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
       Journal of the American Statistical Association, Vol. 107(499), 2012

    .. [2] Levin, K., Roosta-Khorasani, F., Mahoney, M. W., & Priebe, C. E. (2018).
        Out-of-sample extension of graph adjacency spectral embedding. PMLR: Proceedings
        of Machine Learning Research, 80, 2975-2984.

    .. [3] Zhu, M. and Ghodsi, A. (2006). Automatic dimensionality selection from the
        scree plot via the use of profile likelihood. Computational Statistics & Data
        Analysis, 51(2), pp.918-930.

    """
    check_argument(dimensions >= 1, "dimensions must be positive")

    check_argument(elbow_cut is None or elbow_cut >= 1, "elbow_cut must be positive")

    check_argument(
        svd_solver_algorithm in __SVD_SOLVER_TYPES,
        f"svd_solver_algorithm must be one of the values in {','.join(__SVD_SOLVER_TYPES)}",
    )

    check_argument(svd_solver_iterations >= 1, "svd_solver_iterations must be positive")

    check_argument(
        svd_seed is None or 0 <= svd_seed <= 2**32 - 1,
        "svd_seed must be a nonnegative, 32-bit integer",
    )

    check_argument(
        not graph.is_multigraph(),
        "Multigraphs are not supported; you must determine how to represent at most "
        "one edge between any two nodes, and handle the corresponding weights "
        "accordingly",
    )

    used_weight_attribute: Optional[str] = weight_attribute
    if not is_real_weighted(graph, weight_attribute=weight_attribute):
        warnings.warn(
            f"Graphs with edges that do not have a real numeric weight set for every "
            f"{weight_attribute} attribute on every edge are treated as an unweighted "
            f"graph - which presumes all weights are `1.0`. If this is incorrect, "
            f"please add a '{weight_attribute}' attribute to every edge with a real, "
            f"numeric value (e.g. an integer or a float) and call this function again."
        )
        used_weight_attribute = None  # this supercedes what the user said, because
        # not all of the weights are real numbers, if they exist at all
        # this weight=1.0 treatment actually happens in nx.to_scipy_sparse_matrix()

    node_labels = np.array(list(graph.nodes()))

    graph_as_csr = nx.to_scipy_sparse_matrix(
        graph, weight=used_weight_attribute, nodelist=node_labels
    )

    if not is_fully_connected(graph):
        warnings.warn("More than one connected component detected")

    graph_sans_loops = remove_loops(graph_as_csr)

    ranked_graph = pass_to_ranks(graph_sans_loops)

    augmented_graph = augment_diagonal(ranked_graph)

    embedder = AdjacencySpectralEmbed(
        n_components=dimensions,
        n_elbows=None,  # in the short term, we do our own elbow finding
        algorithm=svd_solver_algorithm,
        n_iter=svd_solver_iterations,
        svd_seed=svd_seed,
        concat=False,
        diag_aug=False,
    )
    results = embedder.fit_transform(augmented_graph)
    results_arr: np.ndarray

    if elbow_cut is None:
        if isinstance(results, tuple) or graph.is_directed():
            results_arr = np.concatenate(results, axis=1)
        else:
            results_arr = results
    else:
        column_index = _index_of_elbow(embedder.singular_values_, elbow_cut)
        if isinstance(results, tuple):
            left, right = results
            left = left[:, :column_index]
            right = right[:, :column_index]
            results_arr = np.concatenate((left, right), axis=1)
        else:
            results_arr = results[:, :column_index]

    embeddings = Embeddings(node_labels, results_arr)
    return embeddings
Esempio n. 10
0
#%%
import numpy as np
import seaborn as sns

p = 0.7
q = 0.2
B = np.array([[p ** 2 + q ** 2, 2 * p * q], [2 * p * q, p ** 2 + q ** 2]])

from graspologic.embed import AdjacencySpectralEmbed

# X, D, Y = selectSVD(B, n_components=1)
plt.figure()
sns.heatmap(B, square=True, annot=True)

X = AdjacencySpectralEmbed(n_components=1, diag_aug=False).fit_transform(B)

B_low_rank = X @ X.T

plt.figure()
sns.heatmap(B_low_rank, square=True, annot=True)
Esempio n. 11
0
from graspologic.embed import AdjacencySpectralEmbed
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ortho_group
from graspologic.embed import selectSVD

sns.set_context("talk")

Vs = []
for i in range(10):
    As = [A1, A2, A3, A4]
    Xs = []
    fig, axs = plt.subplots(1, 4, figsize=(16, 4))
    for i, A in enumerate(As):
        ase = AdjacencySpectralEmbed(n_components=2)
        X = ase.fit_transform(A)
        Q = ortho_group.rvs(X.shape[1])
        X = X @ Q
        Xs.append(X)
        sns.scatterplot(x=X[:, 0],
                        y=X[:, 1],
                        hue=labels,
                        ax=axs[i],
                        legend=False)
        # ax.set(xticks=[], yticks=[])
    plt.tight_layout()

    Z = np.concatenate(Xs, axis=1)
    # ase = AdjacencySpectralEmbed(n_components=2)
    # V = ase.fit_transform(Z)