Example #1
0
def test_all_communities_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
    }

    tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)}
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
Example #2
0
def test_multigroup_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85":
        pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9":
        pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99":
        pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3":
        pg.HeatKernel(t=3,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk5":
        pg.HeatKernel(t=5,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk7":
        pg.HeatKernel(t=7,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
    }

    tuned = {
        "selected":
        pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)
    }
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(
        algorithms | tuned,
        loader,
        lambda ground_truth, exclude: pg.MultiSupervised(
            pg.AUC, ground_truth, exclude),
        fraction_of_training=.8,
        seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned,
                                    loader,
                                    pg.Modularity,
                                    sensitive=pg.pRule,
                                    fraction_of_training=.8,
                                    seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
Example #3
0
algorithms = {
    "ppr0.5": pg.PageRank(alpha=0.5, preprocessor=pre, **convergence),
    "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, **convergence),
    "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, **convergence),
    "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, **convergence),
    "hk2": pg.HeatKernel(t=2, preprocessor=pre, **convergence),
    "hk3": pg.HeatKernel(t=3, preprocessor=pre, **convergence),
    "hk5": pg.HeatKernel(t=5, preprocessor=pre, **convergence),
    "hk7": pg.HeatKernel(t=7, preprocessor=pre, **convergence),
}

postprocessor = pg.Tautology
algorithms = pg.benchmarks.create_variations(algorithms, postprocessor)
measure = pg.AUC
optimization = pg.SelfClearDict()


def create_param_tuner(optimizer=pg.optimize):
    return pg.ParameterTuner(lambda params:
                              pg.Normalize(
                                  postprocessor(
                                      pg.GenericGraphFilter([1]+params,
                                                            preprocessor=pre,
                                                            error_type="iters",
                                                            max_iters=41,
                                                            optimization_dict=optimization,
                                                            preserve_norm=False))),
                             deviation_tol=1.E-6,
                             measure=measure,
                             optimizer=optimizer,