def dcsbm_pvalue(G1, G2, max_comm, num_perm, pooled_variance=True, min_comm=1, epsilon1=1e-3, epsilon2=1e-3, Z1=None, Z2=None): """ Estimate p-value via parametric bootstrap, i.e. fit a DC-SBM """ # if we are fixing the number of communities, we should also fix the number of latent dimensions of the embedding # otherwise (when we let the algorithm to automatically choose the number of communities) # we also let it choose the number of latent dimensions if min_comm == max_comm: K = min_comm else: K = None obs_test_stat = gcorr_dcsbm(G1, G2, min_comm=min_comm, max_comm=max_comm, pooled_variance=pooled_variance, epsilon1=epsilon1, epsilon2=epsilon2) G1_dcsbm = DCSBMEstimator(directed=False, min_comm=min_comm, max_comm=max_comm, n_components=K).fit(G1, y=Z1) G2_dcsbm = DCSBMEstimator(directed=False, min_comm=min_comm, max_comm=max_comm, n_components=K).fit(G2, y=Z2) # create bootstrap samples G1_bootstrap = G1_dcsbm.sample(n_samples=num_perm) G2_bootstrap = G2_dcsbm.sample(n_samples=num_perm) null_test_stats = np.zeros(num_perm) for i in tqdm(range(num_perm)): null_test_stats[i] = gcorr_dcsbm(G1_bootstrap[i], G2_bootstrap[i], min_comm=min_comm, max_comm=max_comm, pooled_variance=pooled_variance, epsilon1=epsilon1, epsilon2=epsilon2) num_extreme = np.where(null_test_stats >= obs_test_stat)[0].size if num_extreme < num_perm / 2: # P(T > t | H0) is smaller return (2 * num_extreme + 1) / (num_perm + 1) else: # P(T < t | H0) is smaller return (2 * (num_perm - num_extreme) + 1) / (num_perm + 1)
def test_DCSBM_sample(self): np.random.seed(8888) estimator = DCSBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) dc = np.random.uniform(0.25, 0.75, size=100) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat = p_mat * np.outer(dc, dc) p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat, directed=True) with pytest.raises(NotFittedError): estimator.sample() estimator.fit(g, y=labels) with pytest.raises(ValueError): estimator.sample(n_samples=-1) with pytest.raises(TypeError): estimator.sample(n_samples="nope") estimator.p_mat_ = p_mat _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
G1, G2 = sbm_corr(n, p, args.rho) elif args.sim == 'dcsbm': theta = np.linspace(100, 1, n[0]) theta /= theta.sum() theta = np.concatenate([theta, theta]) G1, G2 = dcsbm_corr(n, p, args.rho, theta) # null by block permutation Z = community_estimation(G1, G2, min_components=max_comm) # Z = np.repeat([0, 1], n) G2_block_perm = block_permutation(G2, Z) # null by parametric bootstrap G1_dcsbm = DCSBMEstimator(directed=False).fit(G1) G2_dcsbm = DCSBMEstimator(directed=False).fit(G2) G1_bootstrap = G1_dcsbm.sample()[0] G2_bootstrap = G2_dcsbm.sample()[0] test_stats_alt['gcorr_block_perm'][i, rep] = gcorr(G1, G2, Z) test_stats_null['gcorr_block_perm'][i, rep] = gcorr(G1, G2_block_perm, Z) test_stats_alt['gcorr_param_bootstrap'][i, rep] = gcorr(G1, G2, Z) test_stats_null['gcorr_param_bootstrap'][i, rep] = gcorr(G1_bootstrap, G2_bootstrap, Z) test_stats_alt['gcorrDC_param_bootstrap'][i, rep] = gcorr_dcsbm(G1, G2, max_comm) test_stats_null['gcorrDC_param_bootstrap'][i, rep] = gcorr_dcsbm(G1_bootstrap, G2_bootstrap, max_comm) test_stats_alt['gcorrDC_block_perm'][i, rep] = gcorr_dcsbm(G1, G2, max_comm) test_stats_null['gcorrDC_block_perm'][i, rep] = gcorr_dcsbm(G1, G2_block_perm, max_comm) # compute power for i in range(num_vertices.size): for t in tests: