Example #1
0
def test_all_communities_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
    }

    tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)}
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
Example #2
0
def test_load_dataset_load():
    assert len(list(pg.load_datasets_all_communities(["graph9"]))) > 1
Example #3
0
                              pg.Normalize(
                                  postprocessor(
                                      pg.GenericGraphFilter([1]+params,
                                                            preprocessor=pre,
                                                            error_type="iters",
                                                            max_iters=41,
                                                            optimization_dict=optimization,
                                                            preserve_norm=False))),
                             deviation_tol=1.E-6,
                             measure=measure,
                             optimizer=optimizer,
                             max_vals=[1]*40,
                             min_vals=[0]*40)


tuned = {
   "select": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.9, measure=measure),
   "tune": create_param_tuner(),
   "tuneLBFGSB": create_param_tuner(pg.lbfgsb)
}

for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3):
    print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\")

loader = pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3)
pg.benchmark_print(
    pg.benchmark_average((pg.benchmark(algorithms | tuned, loader, measure,
                                       fraction_of_training=[0.1, 0.2, 0.3], seed=list(range(1)))), posthocs=True),
    decimals=3, delimiter=" & ", end_line="\\\\")

tuned = {
    "selected":
    pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8),
    #"tuned": pg.ParameterTuner(preprocessor=pre, fraction_of_training=0.8, tol=tol, optimization_dict=optimization, measure=pg.AUC),
    "arnoldi":
    pg.HopTuner(preprocessor=pre,
                basis="arnoldi",
                measure=pg.Cos,
                tol=tol,
                optimization_dict=optimization),
    #"arnoldi2": pg.ParameterTuner(lambda params: pg.HopTuner(preprocessor=pre, basis="arnoldi", num_parameters=int(params[0]),
    #                                                         measure=pg.Cos,
    #                                                         tol=tol, optimization_dict=optimization, tunable_offset=None),
    #                              max_vals=[40], min_vals=[5], divide_range=2, fraction_of_training=0.1),
}

#algorithms = pg.create_variations(algorithms, {"": pg.Tautology, "+Sweep": pg.Sweep})
#print(algorithms.keys())

#for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=50):
#    print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\")
loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
pg.benchmark_print(pg.benchmark(algorithms | tuned,
                                loader,
                                pg.AUC,
                                fraction_of_training=.8,
                                seed=list(range(1))),
                   decimals=3,
                   delimiter=" & ",
                   end_line="\\\\")