def run_pvalue(): num_graphs = graphs.shape[0] pvalues = np.zeros((num_graphs, num_graphs)) for i in range(num_graphs): for j in range(i + 1, num_graphs): print('{}, {}'.format(i, j)) G1 = graphs[i, ...] G2 = graphs[j, ...] if args.test == 1: if args.Z == 1: Z = Ztrue elif args.Z == 2: Z = Zhat[(i, j)] elif args.Z == 3: Z = community_estimation(G1, G2, min_components=max_comm, max_components=max_comm) pvalues[i, j] = block_permutation_pvalue(G1, G2, test='gcorr', num_perm=args.num_iter, Z=Z) elif args.test == 2: Z = None if args.Z == 3: min_comm = max_comm elif args.Z == 2: min_comm = 1 elif args.Z == 1: min_comm = 1 # is NOT used Z = Ztrue pvalues[i, j] = dcsbm_pvalue(G1, G2, min_comm=min_comm, max_comm=max_comm, epsilon1=epsilon1, epsilon2=epsilon2, pooled_variance=args.pooled_variance, num_perm=args.num_iter, Z1=Z, Z2=Z) elif args.test == 3: pvalues[i, j] = pearson_exact_pvalue(G1, G2) return pvalues
# each row stores the optimal community assignment for a pair of graphs # easier for iterating over Zhat = np.zeros((num_tests, num_vertices)) # key: (i, j), value: optimal assignment for graph i and j # easier for access Zhat_dict = {} count = 0 for i in range(num_graphs): for j in range(i, num_graphs): print('{}, {}'.format(i, j)) G1 = graphs[i, ...] if i == j: assignment = community_estimation(G1, min_components=2, max_components=max_comp) else: G2 = graphs[j, ...] assignment = community_estimation(G1, G2, min_components=2, max_components=max_comp) Zhat[count, :] = assignment count += 1 Zhat_dict[(i, j)] = assignment with open('outputs/{}_Zhat_{}.pkl'.format(args.data, args.transformation), 'wb') as f: pickle.dump(Zhat, f) with open('outputs/{}_Zhat_dict_{}.pkl'.format(args.data, args.transformation),
n = num_vertices[i] Z = np.repeat([0], n) if sim == 'bern': if 'sbm' in setting: G1, G2 = sbm_corr_diffmarg(n, p, q, r) else: G1, G2 = er_corr_diffmarg(n, p, q, r) elif sim == 'gauss': if 'sbm' in setting: G1, G2 = sbm_corr_weighted(n, mu1, mu2, Sigma) else: G1, G2 = er_corr_weighted(n, mu1, mu2, Sigma) if setting == 'sbm_estblock': Z = community_estimation(G1, G2, min_components=5) G2_vertex_perm = vertex_permutation(G2) G2_block_perm = block_permutation(G2, Z) test_stats_null['pearson_vertex_perm'][i, rep] = pearson_graph(G1, G2_vertex_perm) test_stats_alt['pearson_vertex_perm'][i, rep] = pearson_graph(G1, G2) test_stats_null['pearson_block_perm'][i, rep] = pearson_graph(G1, G2_block_perm) test_stats_alt['pearson_block_perm'][i, rep] = pearson_graph(G1, G2) test_stats_null['gcorr_vertex_perm'][i, rep] = gcorr(G1, G2_vertex_perm, Z) test_stats_alt['gcorr_vertex_perm'][i, rep] = gcorr(G1, G2, Z) test_stats_null['gcorr_block_perm'][i, rep] = gcorr(G1, G2_block_perm, Z) test_stats_alt['gcorr_block_perm'][i, rep] = gcorr(G1, G2, Z) pearson_exact_pvals[i, rep] = pearson_exact_pvalue(G1, G2) # compute power
result = { 'true': corr, 'gcorr_pooled': np.zeros((num_corr, num_rep)), 'gcorr_unpooled': np.zeros((num_corr, num_rep)), 'gcorr_dcsbm_pooled': np.zeros((num_corr, num_rep)), 'gcorr_dcsbm_unpooled': np.zeros((num_corr, num_rep)) } for i, r in enumerate(corr): print('iteration {} in {}: r = {}'.format(i + 1, num_corr, r)) for j in tqdm(range(num_rep)): g1, g2 = dcsbm_corr(n, p, r, theta) if args.sim == 'estblock': # note `community_estimation` uses MASE # whereas `graspologic.DCSBM` uses LSE zest = community_estimation(g1, g2, maxcomm) result['gcorr_pooled'][i, j] = gcorr(g1, g2, zest, pooled_variance=True) result['gcorr_unpooled'][i, j] = gcorr(g1, g2, zest, pooled_variance=False) result['gcorr_dcsbm_pooled'][i, j] = gcorr_dcsbm(g1, g2, maxcomm, pooled_variance=True, Z=None) result['gcorr_dcsbm_unpooled'][i, j] = gcorr_dcsbm(
def run_test_stats(): num_graphs = graphs.shape[0] test_stats = np.zeros((num_graphs, num_graphs)) if args.return_fit: dcsbm_fit = {} for i in range(num_graphs): for j in range(i + 1, num_graphs): print('{}, {}'.format(i, j)) G1 = graphs[i, ...] G2 = graphs[j, ...] if args.test == 1: if args.Z == 1: Z = Ztrue elif args.Z == 2: Z = Zhat[(i, j)] elif args.Z == 3: Z = community_estimation(G1, G2, min_components=max_comm, max_components=max_comm) test_stats[i, j] = gcorr(G1, G2, Z, pooled_variance=args.pooled_variance) elif args.test == 2: Z = None if args.Z == 3: min_comm = max_comm elif args.Z == 2: min_comm = 1 elif args.Z == 1: min_comm = 1 # is NOT used Z = Ztrue if args.return_fit: ts, fit = gcorr_dcsbm(G1, G2, min_comm=min_comm, max_comm=max_comm, epsilon1=epsilon1, epsilon2=epsilon2, pooled_variance=args.pooled_variance, Z1=Z, Z2=Z, return_fit=True, seed=seed) test_stats[i, j] = ts dcsbm_fit[(i, j)] = fit else: ts = gcorr_dcsbm(G1, G2, min_comm=min_comm, max_comm=max_comm, epsilon1=epsilon1, epsilon2=epsilon2, pooled_variance=args.pooled_variance, Z1=Z, Z2=Z, seed=seed) test_stats[i, j] = ts elif args.test == 3: test_stats[i, j] = pearson_graph(G1, G2) if args.return_fit: return test_stats, dcsbm_fit else: return test_stats