Esempio n. 1
0
def sample_edges_corr_diffmarg(P, Q, R, directed=False, loops=False):
    """
    Generate a pair of correlated graphs with Bernoulli distribution.
    Both G1 and G2 are binary matrices.
    Allows for different marginal distributions
    Parameters
    ----------
    P: np.ndarray, shape (n_vertices, n_vertices)
        Matrix of probabilities (between 0 and 1) for the first random graph.
    Q: np.ndarray, shape (n_vertices, n_vertices)
        Matrix of probabilities (between 0 and 1) for the second random graph.
    R: np.ndarray, shape (n_vertices, n_vertices)
        Matrix of correlation (between 0 and 1) between graph pairs.
    directed: boolean, optional (default=False)
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
        matrix will be asymmetric.
    loops: boolean, optional (default=False)
        If False, no edges will be sampled in the diagonal. Otherwise, edges
        are sampled in the diagonal.
    Returns
    -------
    G1: ndarray (n_vertices, n_vertices)
        Adjacency matrix the same size as P representing a random graph.
    G2: ndarray (n_vertices, n_vertices)
        Adjacency matrix the same size as P representing a random graph.
    """
    # test input
    # check P
    if type(P) is not np.ndarray:
        raise TypeError("P must be numpy.ndarray")
    if len(P.shape) != 2:
        raise ValueError("P must have dimension 2 (n_vertices, n_vertices)")
    if P.shape[0] != P.shape[1]:
        raise ValueError("P must be a square matrix")

    # check Q
    if type(Q) is not np.ndarray:
        raise TypeError("Q must be numpy.ndarray")
    if len(Q.shape) != 2:
        raise ValueError("Q must have dimension 2 (n_vertices, n_vertices)")
    if Q.shape[0] != P.shape[0] or Q.shape[1] != P.shape[1]:
        raise ValueError("Q must have the same shape as P")

    # check R
    if type(R) is not np.ndarray:
        raise TypeError("R must be numpy.ndarray")
    if len(R.shape) != 2:
        raise ValueError("R must have dimension 2 (n_vertices, n_vertices)")
    if R.shape[0] != P.shape[0] or R.shape[1] != P.shape[1]:
        raise ValueError("R must have the same shape as P")

    # check directed and loops
    check_dirloop(directed, loops)

    G1 = sample_edges(P, directed=directed, loops=loops)
    P2 = G1.copy()
    P2 = np.where(P2 == 1, Q + R * np.sqrt((1 - P) * Q * (1 - Q) / P),
                  Q - R * np.sqrt(P * Q * (1 - Q) / (1 - P)))
    G2 = sample_edges(P2, directed=directed, loops=loops)
    return G1, G2
def sample_null_distribution(p_mat, tstat_func, n_samples=1000, parallel=True):
    if parallel:

        def sample_and_tstat(seed=None):
            if seed is not None:
                np.random.seed(seed)
            A = sample_edges(np.array(p_mat), directed=True, loops=False)
            if not is_fully_connected(A):
                print(
                    "Original sample was not fully connected, trying again...")
                tries = 0
                connected = False
                while not connected and tries < 10:
                    A = sample_edges(np.array(p_mat),
                                     directed=True,
                                     loops=False)
                    connected = is_fully_connected(A)
                    tries += 1
                if not connected:
                    print("Did not sample connected graph after 10 tries.")
            tstat = tstat_func(A)
            return tstat

        seeds = np.random.randint(1e8, size=n_samples)
        null = Parallel(n_jobs=-2, verbose=10)(delayed(sample_and_tstat)(seed)
                                               for seed in seeds)
    else:
        null = []
        for i in tqdm(range(n_samples)):
            A = sample_edges(p_mat, directed=True, loops=False)
            if not is_fully_connected(A):
                print(
                    "Original sample was not fully connected, trying again...")
                tries = 0
                connected = False
                while not connected and tries < 10:
                    A = sample_edges(np.array(p_mat),
                                     directed=True,
                                     loops=False)
                    connected = is_fully_connected(A)
                    tries += 1
                if not connected:
                    print("Did not sample connected graph after 10 tries.")
            tstat = tstat_func(A)
            null.append(tstat)
    null = np.array(null)
    null = np.sort(null)
    return null
 def sample_and_tstat(seed=None):
     if seed is not None:
         np.random.seed(seed)
     A = sample_edges(np.array(p_mat), directed=True, loops=False)
     if not is_fully_connected(A):
         print(
             "Original sample was not fully connected, trying again...")
         tries = 0
         connected = False
         while not connected and tries < 10:
             A = sample_edges(np.array(p_mat),
                              directed=True,
                              loops=False)
             connected = is_fully_connected(A)
             tries += 1
         if not connected:
             print("Did not sample connected graph after 10 tries.")
     tstat = tstat_func(A)
     return tstat
Esempio n. 4
0
 def setup_class(cls):
     np.random.seed(8888)
     n = 1000
     p = 0.5
     dc = np.random.beta(2, 5, size=n)
     p_mat = np.full((n, n), p)
     p_mat = p_mat * np.outer(dc, dc)
     p_mat -= np.diag(np.diag(p_mat))
     graph = sample_edges(p_mat, directed=True, loops=False)
     cls.p_mat = p_mat
     cls.graph = graph
Esempio n. 5
0
 def setup_class(cls):
     np.random.seed(8888)
     n_verts = 500
     point1 = np.array([0.1, 0.9])
     point2 = np.array([0.9, 0.1])
     latent1 = np.tile(point1, reps=(n_verts, 1))
     latent2 = np.tile(point2, reps=(n_verts, 1))
     latent = np.concatenate((latent1, latent2), axis=0)
     p_mat = latent @ latent.T
     p_mat -= np.diag(np.diag(p_mat))
     g = sample_edges(p_mat)
     cls.p_mat = p_mat
     cls.graph = g
Esempio n. 6
0
    def test_SBM_score(self):
        # tests score() and score_sample()
        B = np.array([[0.75, 0.25], [0.25, 0.75]])
        n_verts = 100
        n = np.array([n_verts, n_verts])
        tau = _n_to_labels(n)
        p_mat = _block_to_full(B, tau, shape=(n_verts * 2, n_verts * 2))
        graph = sample_edges(p_mat, directed=True)
        estimator = SBMEstimator(max_comm=4)
        _test_score(estimator, p_mat, graph)

        with pytest.raises(ValueError):
            estimator.score_samples(graph=graph[1:100, 1:100])
Esempio n. 7
0
 def setUp(self) -> None:
     np.random.seed(8888)
     n_verts = 500
     point1 = np.array([0.1, 0.9])
     point2 = np.array([0.9, 0.1])
     latent1 = np.tile(point1, reps=(n_verts, 1))
     latent2 = np.tile(point2, reps=(n_verts, 1))
     latent = np.concatenate((latent1, latent2), axis=0)
     p_mat = latent @ latent.T
     p_mat -= np.diag(np.diag(p_mat))
     g = sample_edges(p_mat)
     self.p_mat = p_mat
     self.graph = g
Esempio n. 8
0
    def test_RDPG_fit(self):
        np.random.seed(8888)
        n_points = 2000
        dists = np.random.uniform(0, 1, n_points)
        points = hardy_weinberg(dists)

        p_mat = points @ points.T
        p_mat -= np.diag(np.diag(p_mat))
        g = sample_edges(p_mat)

        estimator = RDPGEstimator(loops=False, n_components=3)
        estimator.fit(g)

        assert_allclose(estimator.p_mat_, p_mat, atol=0.2)
Esempio n. 9
0
    def test_SBM_fit_unsupervised(self):
        np.random.seed(12345)
        n_verts = 1500

        B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]])
        n = np.array([500, 500, 500])
        labels = _n_to_labels(n)
        p_mat = _block_to_full(B, labels, (n_verts, n_verts))
        p_mat -= np.diag(np.diag(p_mat))
        graph = sample_edges(p_mat, directed=True, loops=False)
        sbe = SBMEstimator(directed=True, loops=False)
        sbe.fit(graph)
        assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95
        assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
Esempio n. 10
0
 def setUpClass(cls) -> None:
     np.random.seed(8888)
     B = np.array([
         [0.9, 0.2, 0.05, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.2, 0.4, 0.8, 0.5],
         [0.1, 0.2, 0.1, 0.7],
     ])
     n = np.array([1000, 1000, 500, 500])
     dc = np.random.beta(2, 5, size=n.sum())
     labels = _n_to_labels(n)
     p_mat = _block_to_full(B, labels, (n.sum(), n.sum()))
     p_mat = p_mat * np.outer(dc, dc)
     p_mat -= np.diag(np.diag(p_mat))
     g = sample_edges(p_mat, directed=True, loops=False)
     cls.p_mat = p_mat
     cls.labels = labels
     cls.g = g
Esempio n. 11
0
    def test_DCER_sample(self):
        np.random.seed(8888)
        estimator = DCEREstimator(directed=True, loops=False)
        g = self.graph
        p_mat = self.p_mat
        with pytest.raises(NotFittedError):
            estimator.sample()

        estimator.fit(g)
        with pytest.raises(ValueError):
            estimator.sample(n_samples=-1)

        with pytest.raises(TypeError):
            estimator.sample(n_samples="nope")
        B = 0.5
        dc = np.random.uniform(0.25, 0.75, size=100)
        p_mat = np.outer(dc, dc) * B
        p_mat -= np.diag(np.diag(p_mat))
        g = sample_edges(p_mat, directed=True)
        estimator.fit(g)
        estimator.p_mat_ = p_mat
        _test_sample(estimator, p_mat, n_samples=1000, atol=0.2)
Esempio n. 12
0
    def test_DCSBM_sample(self):
        np.random.seed(8888)
        estimator = DCSBMEstimator(directed=True, loops=False)
        B = np.array([[0.9, 0.1], [0.1, 0.9]])
        dc = np.random.uniform(0.25, 0.75, size=100)
        labels = _n_to_labels([50, 50])

        p_mat = _block_to_full(B, labels, (100, 100))
        p_mat = p_mat * np.outer(dc, dc)
        p_mat -= np.diag(np.diag(p_mat))
        g = sample_edges(p_mat, directed=True)

        with pytest.raises(NotFittedError):
            estimator.sample()

        estimator.fit(g, y=labels)
        with pytest.raises(ValueError):
            estimator.sample(n_samples=-1)

        with pytest.raises(TypeError):
            estimator.sample(n_samples="nope")
        estimator.p_mat_ = p_mat
        _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
def sample_upset():
    P = construct_feedforward_P(n, p=p, delta=0)
    A = sample_edges(P, directed=True, loops=False)
    return A
    triu_inds = np.triu_indices(n, k=1)
    p_upper = p + delta
    p_lower = p - delta
    P = np.zeros((n, n))
    P[triu_inds] = p_upper
    P[triu_inds[::-1]] = p_lower
    return P


n = 30
p = 0.5
delta = 0.1

P = construct_feedforward_P(n, p=p, delta=delta)

A = sample_edges(P, directed=True, loops=False)

fig, axs = plt.subplots(1, 3, figsize=(12, 4))

# TODO make a plot of Phat
title = r"$P$" + "\n"
title += r"$p = $" + f"{p}, " + r"$\delta = $" + f"{delta}"
ax = axs[0]
heatmap(P, vmin=0, vmax=1, cbar=False, ax=ax, title=title)
ax.text(n / 4, 3 * n / 4, r"$p - \delta$", ha="center", va="center")
ax.text(3 * n / 4,
        n / 4,
        r"$p - \delta$",
        ha="center",
        va="center",
        color="white")
Esempio n. 15
0
def sample_edges_corr(P, R, directed=False, loops=False):
    """
    Generate a pair of correlated graphs with Bernoulli distribution.
    Both G1 and G2 are binary matrices.

    Parameters
    ----------
    P: np.ndarray, shape (n_vertices, n_vertices)
        Matrix of probabilities (between 0 and 1) for a random graph.

    R: np.ndarray, shape (n_vertices, n_vertices)
        Matrix of correlation (between 0 and 1) between graph pairs.

    directed: boolean, optional (default=False)
        If False, output adjacency matrix will be symmetric. Otherwise, output adjacency
        matrix will be asymmetric.

    loops: boolean, optional (default=False)
        If False, no edges will be sampled in the diagonal. Otherwise, edges
        are sampled in the diagonal.

    References
    ----------
    .. [1] Vince Lyzinski, et al. "Seeded Graph Matching for Correlated Erdos-Renyi Graphs",
       Journal of Machine Learning Research 15, 2014

    Returns
    -------
    G1: ndarray (n_vertices, n_vertices)
        Adjacency matrix the same size as P representing a random graph.

    G2: ndarray (n_vertices, n_vertices)
        Adjacency matrix the same size as P representing a random graph.

    Examples
    --------
    >>> np.random.seed(1)
    >>> p = 0.5
    >>> r = 0.3
    >>> R = r * np.ones((5, 5))
    >>> P = p * np.ones((5, 5))

    To sample a correlated graph pair based on P and R matrices:

    >>> sample_edges_corr(P, R, directed = False, loops = False)
    (array([[0., 1., 0., 0., 0.],
            [1., 0., 0., 0., 0.],
            [0., 0., 0., 0., 1.],
            [0., 0., 0., 0., 1.],
            [0., 0., 1., 1., 0.]]), array([[0., 1., 0., 0., 0.],
            [1., 0., 1., 0., 1.],
            [0., 1., 0., 1., 1.],
            [0., 0., 1., 0., 1.],
            [0., 1., 1., 1., 0.]]))
    """
    # test input
    # check P
    if type(P) is not np.ndarray:
        raise TypeError("P must be numpy.ndarray")
    if len(P.shape) != 2:
        raise ValueError("P must have dimension 2 (n_vertices, n_vertices)")
    if P.shape[0] != P.shape[1]:
        raise ValueError("P must be a square matrix")

    # check R
    if type(R) is not np.ndarray:
        raise TypeError("R must be numpy.ndarray")
    if len(R.shape) != 2:
        raise ValueError("R must have dimension 2 (n_vertices, n_vertices)")
    if R.shape[0] != P.shape[1]:
        raise ValueError("R must be a square matrix")

    # check directed and loops
    check_dirloop(directed, loops)

    G1 = sample_edges(P, directed=directed, loops=loops)
    P2 = G1.copy()
    P2 = np.where(P2 == 1, P + R * (1 - P), P * (1 - R))
    G2 = sample_edges(P2, directed=directed, loops=loops)
    return G1, G2
Esempio n. 16
0
#%%

from graspologic.simulations import sample_edges, sbm
from graspologic.utils import cartprod
import seaborn as sns

n_per_comm = 50
B = np.array([[0.8, 0.1, 0.1], [0.1, 0.75, 0.05], [0.1, 0.05, 0.6]])
_, labels = sbm([n_per_comm, n_per_comm, n_per_comm], B, return_labels=True)
P = B[np.ix_(labels, labels)]
sns.heatmap(P)

#%%
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
true_eigvals = np.linalg.eigvalsh(P)
n_sims = 1000
all_estimated_eigvals = []
for i in range(n_sims):
    A = sample_edges(P, directed=False, loops=True)
    estimated_eigvals = np.linalg.eigvalsh(A)
    all_estimated_eigvals += list(estimated_eigvals)

sns.histplot((all_estimated_eigvals), ax=ax, stat='density')

for true_eigval in true_eigvals[::-1][:3]:
    ax.axvline(true_eigval, color="darkred")

#%% 

np.linalg.norm(P - A, ord=2)