def test_SBM_score(self): # tests score() and score_sample() B = np.array([[0.75, 0.25], [0.25, 0.75]]) n_verts = 100 n = np.array([n_verts, n_verts]) tau = _n_to_labels(n) p_mat = _block_to_full(B, tau, shape=(n_verts * 2, n_verts * 2)) graph = sample_edges(p_mat, directed=True) estimator = SBMEstimator(max_comm=4) _test_score(estimator, p_mat, graph)
def setup_class(cls): np.random.seed(8888) n = 1000 p = 0.5 dc = np.random.beta(2, 5, size=n) p_mat = np.full((n, n), p) p_mat = p_mat * np.outer(dc, dc) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) cls.p_mat = p_mat cls.graph = graph
def setup_class(cls): np.random.seed(8888) n_verts = 500 point1 = np.array([0.1, 0.9]) point2 = np.array([0.9, 0.1]) latent1 = np.tile(point1, reps=(n_verts, 1)) latent2 = np.tile(point2, reps=(n_verts, 1)) latent = np.concatenate((latent1, latent2), axis=0) p_mat = latent @ latent.T p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat) cls.p_mat = p_mat cls.graph = g
def test_RDPG_fit(self): np.random.seed(8888) n_points = 2000 dists = np.random.uniform(0, 1, n_points) points = hardy_weinberg(dists) p_mat = points @ points.T p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat) estimator = RDPGEstimator(loops=False, n_components=3) estimator.fit(g) assert_allclose(estimator.p_mat_, p_mat, atol=0.2)
def test_SBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) sbe.fit(graph) assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
def setup_class(cls): np.random.seed(8888) B = np.array([ [0.9, 0.2, 0.05, 0.1], [0.1, 0.7, 0.1, 0.1], [0.2, 0.4, 0.8, 0.5], [0.1, 0.2, 0.1, 0.7], ]) n = np.array([1000, 1000, 500, 500]) dc = np.random.beta(2, 5, size=n.sum()) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n.sum(), n.sum())) p_mat = p_mat * np.outer(dc, dc) p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat, directed=True, loops=False) cls.p_mat = p_mat cls.labels = labels cls.g = g
def test_DCER_sample(self): np.random.seed(8888) estimator = DCEREstimator(directed=True, loops=False) g = self.graph p_mat = self.p_mat with pytest.raises(NotFittedError): estimator.sample() estimator.fit(g) with pytest.raises(ValueError): estimator.sample(n_samples=-1) with pytest.raises(TypeError): estimator.sample(n_samples="nope") B = 0.5 dc = np.random.uniform(0.25, 0.75, size=100) p_mat = np.outer(dc, dc) * B p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat, directed=True) estimator.fit(g) estimator.p_mat_ = p_mat _test_sample(estimator, p_mat, n_samples=1000, atol=0.2)
def test_DCSBM_sample(self): np.random.seed(8888) estimator = DCSBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) dc = np.random.uniform(0.25, 0.75, size=100) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat = p_mat * np.outer(dc, dc) p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat, directed=True) with pytest.raises(NotFittedError): estimator.sample() estimator.fit(g, y=labels) with pytest.raises(ValueError): estimator.sample(n_samples=-1) with pytest.raises(TypeError): estimator.sample(n_samples="nope") estimator.p_mat_ = p_mat _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
def sample_hw_graph(thetas): latent = hardy_weinberg(thetas) p_mat = p_from_latent(latent, rescale=False, loops=False) graph = sample_edges(p_mat, directed=False, loops=False) return (graph, p_mat, latent)
def sample_edges_corr(P, R, directed=False, loops=False): """ Generate a pair of correlated graphs with Bernoulli distribution. Both G1 and G2 are binary matrices. Parameters ---------- P: np.ndarray, shape (n_vertices, n_vertices) Matrix of probabilities (between 0 and 1) for a random graph. R: np.ndarray, shape (n_vertices, n_vertices) Matrix of correlation (between 0 and 1) between graph pairs. directed: boolean, optional (default=False) If False, output adjacency matrix will be symmetric. Otherwise, output adjacency matrix will be asymmetric. loops: boolean, optional (default=False) If False, no edges will be sampled in the diagonal. Otherwise, edges are sampled in the diagonal. References ---------- .. [1] Vince Lyzinski, et al. "Seeded Graph Matching for Correlated Erdos-Renyi Graphs", Journal of Machine Learning Research 15, 2014 Returns ------- G1: ndarray (n_vertices, n_vertices) Adjacency matrix the same size as P representing a random graph. G2: ndarray (n_vertices, n_vertices) Adjacency matrix the same size as P representing a random graph. Examples -------- >>> np.random.seed(1) >>> p = 0.5 >>> r = 0.3 >>> R = r * np.ones((5, 5)) >>> P = p * np.ones((5, 5)) To sample a correlated graph pair based on P and R matrices: >>> sample_edges_corr(P, R, directed = False, loops = False) (array([[0., 1., 0., 0., 0.], [1., 0., 0., 0., 0.], [0., 0., 0., 0., 1.], [0., 0., 0., 0., 1.], [0., 0., 1., 1., 0.]]), array([[0., 1., 0., 0., 0.], [1., 0., 1., 0., 1.], [0., 1., 0., 1., 1.], [0., 0., 1., 0., 1.], [0., 1., 1., 1., 0.]])) """ # test input # check P if type(P) is not np.ndarray: raise TypeError("P must be numpy.ndarray") if len(P.shape) != 2: raise ValueError("P must have dimension 2 (n_vertices, n_vertices)") if P.shape[0] != P.shape[1]: raise ValueError("P must be a square matrix") # check R if type(R) is not np.ndarray: raise TypeError("R must be numpy.ndarray") if len(R.shape) != 2: raise ValueError("R must have dimension 2 (n_vertices, n_vertices)") if R.shape[0] != P.shape[1]: raise ValueError("R must be a square matrix") # check directed and loops check_dirloop(directed, loops) G1 = sample_edges(P, directed=directed, loops=loops) P2 = G1.copy() P2 = np.where(P2 == 1, P + R * (1 - P), P * (1 - R)) G2 = sample_edges(P2, directed=directed, loops=loops) return G1, G2
def sample(P, directed=False): print(directed) print(P) G = sample_edges(P, directed=directed) return G
#%% import numpy as np from graspy.simulations import sample_edges, er_np from graspy.plot import heatmap g = er_np(10, 0.5) heatmap(g) P = 0.5 * np.ones((10, 10)) g = sample_edges(P) heatmap(g) #%% g == 1 P[g == 1] = 100 P[g == 0] = -100 P heatmap(g) heatmap(P) # %% directed = True if directed: sample_edges(P, directed=True) else: sample_edges(P, directed=False) sample_edges(P, directed=directed) # %% def sample(P, directed=False): print(directed) print(P)
def gen_hw_graph(n_verts): thetas = np.random.uniform(0, 1, n_verts) latent = hardy_weinberg(thetas) p_mat = p_from_latent(latent, rescale=False, loops=False) graph = sample_edges(p_mat, directed=True, loops=False) return (graph, p_mat)
pois_scale1, acorn=acorn) # could turn this on to add some sbm masses # mu1 = np.array([0.2, 0.05, 0.05]) # mu2 = np.array([0.05, 0.2, 0.05]) # mu3 = np.array([0.05, 0.05, 0.2]) # X = np.concatenate((X, np.tile(mu1, (n_blob1_verts, 1)))) # X = np.concatenate((X, np.tile(mu2, (n_blob2_verts, 1)))) # X = np.concatenate((X, np.tile(mu3, (n_blob3_verts, 1)))) n_verts = X.shape[0] P = hw_scale * X @ X.T graph_uw = sample_edges(P, directed=False, loops=False) print(np.mean(graph_uw)) verts = np.array(range(n_verts)) verts_mod = np.random.choice(range(n_hw_nodes), n_modified_verts, replace=False) lambda_mat = X @ X.T * pois_scale0 lambda_mat[np.ix_(verts_mod, verts_mod)] = P[np.ix_(verts_mod, verts_mod)] * pois_scale1 graph_w = np.random.poisson(lambda_mat) graph_w = symmetrize(graph_w) graph_w = np.multiply(graph_w, graph_uw) heatmap(graph_w, transform="log")
from graspy.inference import LatentDistributionTest from graspy.simulations import p_from_latent, sample_edges from tqdm import tqdm n_sims = 200 n_verts = 200 n_components = 2 latent_size = (n_verts, n_components) directed = False latent = np.random.uniform(0.2, 0.5, size=latent_size) p_mat = p_from_latent(latent, rescale=False, loops=False) sim_p_vals = np.zeros(n_sims) for i in tqdm(range(n_sims)): graph1 = sample_edges(p_mat, directed=directed, loops=False) graph2 = sample_edges(p_mat, directed=directed, loops=False) ldt = LatentDistributionTest(n_components=n_components, n_bootstraps=1000) out = ldt.fit(graph1, graph2) p_val = ldt.p_ sim_p_vals[i] = p_val #%% from graspy.plot import pairplot pairplot(latent) from graspy.embed import AdjacencySpectralEmbed ase = AdjacencySpectralEmbed(n_components=3) latent_hat = ase.fit_transform(graph1) pairplot(latent_hat)