def dcsbm_pvalue(G1,
                 G2,
                 max_comm,
                 num_perm,
                 pooled_variance=True,
                 min_comm=1,
                 epsilon1=1e-3,
                 epsilon2=1e-3,
                 Z1=None,
                 Z2=None):
    """
    Estimate p-value via parametric bootstrap, i.e. fit a DC-SBM
    """
    # if we are fixing the number of communities, we should also fix the number of latent dimensions of the embedding
    # otherwise (when we let the algorithm to automatically choose the number of communities)
    # we also let it choose the number of latent dimensions
    if min_comm == max_comm:
        K = min_comm
    else:
        K = None
    obs_test_stat = gcorr_dcsbm(G1,
                                G2,
                                min_comm=min_comm,
                                max_comm=max_comm,
                                pooled_variance=pooled_variance,
                                epsilon1=epsilon1,
                                epsilon2=epsilon2)
    G1_dcsbm = DCSBMEstimator(directed=False,
                              min_comm=min_comm,
                              max_comm=max_comm,
                              n_components=K).fit(G1, y=Z1)
    G2_dcsbm = DCSBMEstimator(directed=False,
                              min_comm=min_comm,
                              max_comm=max_comm,
                              n_components=K).fit(G2, y=Z2)
    # create bootstrap samples
    G1_bootstrap = G1_dcsbm.sample(n_samples=num_perm)
    G2_bootstrap = G2_dcsbm.sample(n_samples=num_perm)
    null_test_stats = np.zeros(num_perm)
    for i in tqdm(range(num_perm)):
        null_test_stats[i] = gcorr_dcsbm(G1_bootstrap[i],
                                         G2_bootstrap[i],
                                         min_comm=min_comm,
                                         max_comm=max_comm,
                                         pooled_variance=pooled_variance,
                                         epsilon1=epsilon1,
                                         epsilon2=epsilon2)
    num_extreme = np.where(null_test_stats >= obs_test_stat)[0].size
    if num_extreme < num_perm / 2:
        # P(T > t | H0) is smaller
        return (2 * num_extreme + 1) / (num_perm + 1)
    else:
        # P(T < t | H0) is smaller
        return (2 * (num_perm - num_extreme) + 1) / (num_perm + 1)
예제 #2
0
파일: test_models.py 프로젝트: zeou1/graspy
    def test_DCSBM_sample(self):
        np.random.seed(8888)
        estimator = DCSBMEstimator(directed=True, loops=False)
        B = np.array([[0.9, 0.1], [0.1, 0.9]])
        dc = np.random.uniform(0.25, 0.75, size=100)
        labels = _n_to_labels([50, 50])

        p_mat = _block_to_full(B, labels, (100, 100))
        p_mat = p_mat * np.outer(dc, dc)
        p_mat -= np.diag(np.diag(p_mat))
        g = sample_edges(p_mat, directed=True)

        with pytest.raises(NotFittedError):
            estimator.sample()

        estimator.fit(g, y=labels)
        with pytest.raises(ValueError):
            estimator.sample(n_samples=-1)

        with pytest.raises(TypeError):
            estimator.sample(n_samples="nope")
        estimator.p_mat_ = p_mat
        _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
            G1, G2 = sbm_corr(n, p, args.rho)
        elif args.sim == 'dcsbm':
            theta = np.linspace(100, 1, n[0])
            theta /= theta.sum()
            theta = np.concatenate([theta, theta])
            G1, G2 = dcsbm_corr(n, p, args.rho, theta)

        # null by block permutation
        Z = community_estimation(G1, G2, min_components=max_comm)
        # Z = np.repeat([0, 1], n)
        G2_block_perm = block_permutation(G2, Z)

        # null by parametric bootstrap
        G1_dcsbm = DCSBMEstimator(directed=False).fit(G1)
        G2_dcsbm = DCSBMEstimator(directed=False).fit(G2)
        G1_bootstrap = G1_dcsbm.sample()[0]
        G2_bootstrap = G2_dcsbm.sample()[0]

        test_stats_alt['gcorr_block_perm'][i, rep] = gcorr(G1, G2, Z)
        test_stats_null['gcorr_block_perm'][i, rep] = gcorr(G1, G2_block_perm, Z)
        test_stats_alt['gcorr_param_bootstrap'][i, rep] = gcorr(G1, G2, Z)
        test_stats_null['gcorr_param_bootstrap'][i, rep] = gcorr(G1_bootstrap, G2_bootstrap, Z)
        test_stats_alt['gcorrDC_param_bootstrap'][i, rep] = gcorr_dcsbm(G1, G2, max_comm)
        test_stats_null['gcorrDC_param_bootstrap'][i, rep] = gcorr_dcsbm(G1_bootstrap, G2_bootstrap, max_comm)
        test_stats_alt['gcorrDC_block_perm'][i, rep] = gcorr_dcsbm(G1, G2, max_comm)
        test_stats_null['gcorrDC_block_perm'][i, rep] = gcorr_dcsbm(G1, G2_block_perm, max_comm)


# compute power
for i in range(num_vertices.size):
    for t in tests: