def sample_hw_graph(thetas): latent = hardy_weinberg(thetas) p_mat = p_from_latent(latent, rescale=False, loops=False) graph = sample_edges(p_mat, directed=False, loops=False) return (graph, p_mat, latent)
def gen_hw_graph(n_verts): thetas = np.random.uniform(0, 1, n_verts) latent = hardy_weinberg(thetas) p_mat = p_from_latent(latent, rescale=False, loops=False) graph = sample_edges(p_mat, directed=True, loops=False) return (graph, p_mat)
def rdpg_corr(X, Y, r, rescale=False, directed=False, loops=False): r""" Samples a random graph pair based on the latent positions in X (and optionally in Y) If only X :math:`\in\mathbb{R}^{n\times d}` is given, the P matrix is calculated as :math:`P = XX^T`. If X, Y :math:`\in\mathbb{R}^{n\times d}` is given, then :math:`P = XY^T`. These operations correspond to the dot products between a set of latent positions, so each row in X or Y represents the latent positions in :math:`\mathbb{R}^{d}` for a single vertex in the random graph Note that this function may also rescale or clip the resulting P matrix to get probabilities between 0 and 1, or remove loops. A binary random graph is then sampled from the P matrix described by X (and possibly Y). Read more in the :ref:`tutorials <simulations_tutorials>` Parameters ---------- X: np.ndarray, shape (n_vertices, n_dimensions) latent position from which to generate a P matrix if Y is given, interpreted as the left latent position Y: np.ndarray, shape (n_vertices, n_dimensions) or None, optional right latent position from which to generate a P matrix r: float The value of the correlation between the same vertices in two graphs. rescale: boolean, optional (default=True) when rescale is True, will subtract the minimum value in P (if it is below 0) and divide by the maximum (if it is above 1) to ensure that P has entries between 0 and 1. If False, elements of P outside of [0, 1] will be clipped. directed: boolean, optional (default=False) If False, output adjacency matrix will be symmetric. Otherwise, output adjacency matrix will be asymmetric. loops: boolean, optional (default=True) If False, no edges will be sampled in the diagonal. Diagonal elements in P matrix are removed prior to rescaling (see above) which may affect behavior. Otherwise, edges are sampled in the diagonal. Returns ------- G1: ndarray (n_vertices, n_vertices) A matrix representing the probabilities of connections between vertices in a random graph based on their latent positions G2: ndarray (n_vertices, n_vertices) A matrix representing the probabilities of connections between vertices in a random graph based on their latent positions References ---------- .. [1] Vince Lyzinski, Donniell E Fishkind profile imageDonniell E. Fishkind, Carey E Priebe. "Seeded graph matching for correlated Erdös-Rényi graphs". The Journal of Machine Learning Research, January 2014 Examples -------- >>> np.random.seed(1234) >>> X = np.random.dirichlet([1, 1], size=5) >>> Y = None Generate random latent positions using 2-dimensional Dirichlet distribution. Then sample a correlated RDPG graph pair: >>> rdpg_corr(X, Y, 0.3, rescale=False, directed=False, loops=False) (array([[0., 1., 0., 1., 0.], [1., 0., 0., 1., 1.], [0., 0., 0., 0., 0.], [1., 1., 0., 0., 0.], [0., 1., 0., 0., 0.]]), array([[0., 1., 0., 1., 0.], [1., 0., 0., 0., 1.], [0., 0., 0., 0., 0.], [1., 0., 0., 0., 0.], [0., 1., 0., 0., 0.]])) """ # check r if not np.issubdtype(type(r), np.floating): raise TypeError("r is not of type float.") elif r < -1 or r > 1: msg = "r must between -1 and 1." raise ValueError(msg) # check directed and loops if type(directed) is not bool: raise TypeError("directed is not of type bool.") if type(loops) is not bool: raise TypeError("loops is not of type bool.") # check dimensions of X and Y if Y != None: if type(X) is not np.ndarray or type(Y) is not np.ndarray: raise TypeError("Latent positions must be numpy.ndarray") if X.ndim != 2 or Y.ndim != 2: raise ValueError( "Latent positions must have dimension 2 (n_vertices, n_dimensions)" ) if X.shape != Y.shape: raise ValueError("Dimensions of latent positions X and Y must be the same") if Y is None: Y = X P = p_from_latent(X, Y, rescale=rescale, loops=loops) n = P.shape[0] R = np.full((n, n), r) G1, G2 = sample_edges_corr(P, R, directed=directed, loops=loops) return G1, G2
#%% import numpy as np import seaborn as sns from graspy.inference import LatentDistributionTest from graspy.simulations import p_from_latent, sample_edges from tqdm import tqdm n_sims = 200 n_verts = 200 n_components = 2 latent_size = (n_verts, n_components) directed = False latent = np.random.uniform(0.2, 0.5, size=latent_size) p_mat = p_from_latent(latent, rescale=False, loops=False) sim_p_vals = np.zeros(n_sims) for i in tqdm(range(n_sims)): graph1 = sample_edges(p_mat, directed=directed, loops=False) graph2 = sample_edges(p_mat, directed=directed, loops=False) ldt = LatentDistributionTest(n_components=n_components, n_bootstraps=1000) out = ldt.fit(graph1, graph2) p_val = ldt.p_ sim_p_vals[i] = p_val #%% from graspy.plot import pairplot pairplot(latent) from graspy.embed import AdjacencySpectralEmbed