def test_all_communities_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)} loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_load_dataset_load(): assert len(list(pg.load_datasets_all_communities(["graph9"]))) > 1
pg.Normalize( postprocessor( pg.GenericGraphFilter([1]+params, preprocessor=pre, error_type="iters", max_iters=41, optimization_dict=optimization, preserve_norm=False))), deviation_tol=1.E-6, measure=measure, optimizer=optimizer, max_vals=[1]*40, min_vals=[0]*40) tuned = { "select": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.9, measure=measure), "tune": create_param_tuner(), "tuneLBFGSB": create_param_tuner(pg.lbfgsb) } for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3): print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3) pg.benchmark_print( pg.benchmark_average((pg.benchmark(algorithms | tuned, loader, measure, fraction_of_training=[0.1, 0.2, 0.3], seed=list(range(1)))), posthocs=True), decimals=3, delimiter=" & ", end_line="\\\\")
tuned = { "selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8), #"tuned": pg.ParameterTuner(preprocessor=pre, fraction_of_training=0.8, tol=tol, optimization_dict=optimization, measure=pg.AUC), "arnoldi": pg.HopTuner(preprocessor=pre, basis="arnoldi", measure=pg.Cos, tol=tol, optimization_dict=optimization), #"arnoldi2": pg.ParameterTuner(lambda params: pg.HopTuner(preprocessor=pre, basis="arnoldi", num_parameters=int(params[0]), # measure=pg.Cos, # tol=tol, optimization_dict=optimization, tunable_offset=None), # max_vals=[40], min_vals=[5], divide_range=2, fraction_of_training=0.1), } #algorithms = pg.create_variations(algorithms, {"": pg.Tautology, "+Sweep": pg.Sweep}) #print(algorithms.keys()) #for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=50): # print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")