Exemple #1
0
def test_algorithm_selection():
    for _ in supported_backends():
        _, graph, communities = next(
            pg.load_datasets_multiple_communities(["bigraph"],
                                                  max_group_number=3))
        train, test = pg.split(communities,
                               0.05)  # 5% of community members are known
        algorithms = pg.create_variations(pg.create_demo_filters(),
                                          pg.Normalize)

        supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                                     measure=pg.AUC,
                                                     tuning_backend="numpy")
        print(supervised_algorithm.cite())
        modularity_algorithm = pg.AlgorithmSelection(
            algorithms.values(),
            fraction_of_training=1,
            measure=pg.Modularity().as_supervised_method(),
            tuning_backend="numpy")

        supervised_aucs = list()
        modularity_aucs = list()
        for seeds, members in zip(train.values(), test.values()):
            measure = pg.AUC(members, exclude=seeds)
            supervised_aucs.append(measure(supervised_algorithm(graph, seeds)))
            modularity_aucs.append(measure(modularity_algorithm(graph, seeds)))

        assert abs(
            sum(supervised_aucs) / len(supervised_aucs) -
            sum(modularity_aucs) / len(modularity_aucs)) < 0.05
Exemple #2
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
Exemple #3
0
def test_all_communities_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
    }

    tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)}
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
Exemple #4
0
def test_autotune_methods():
    import numpy as np
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}))
    aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()]
    auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training))
    assert max(aucs)-np.std(aucs) <= auc2
Exemple #5
0
def test_multigroup_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85":
        pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9":
        pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99":
        pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3":
        pg.HeatKernel(t=3,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk5":
        pg.HeatKernel(t=5,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk7":
        pg.HeatKernel(t=7,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
    }

    tuned = {
        "selected":
        pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)
    }
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(
        algorithms | tuned,
        loader,
        lambda ground_truth, exclude: pg.MultiSupervised(
            pg.AUC, ground_truth, exclude),
        fraction_of_training=.8,
        seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned,
                                    loader,
                                    pg.Modularity,
                                    sensitive=pg.pRule,
                                    fraction_of_training=.8,
                                    seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
Exemple #6
0
def test_autotune_citations():
    assert pg.ParameterTuner().cite() != pg.GenericGraphFilter().cite()
    assert pg.HopTuner().cite() != pg.GenericGraphFilter().cite()
    assert pg.AlgorithmSelection().cite() != pg.GenericGraphFilter().cite()
Exemple #7
0
import pygrank as pg
datasets = ["EUCore", "Amazon"]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
algs = {
    "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
}

algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep})
loader = pg.load_datasets_one_community(datasets)
algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000)
algs["selected"] = pg.AlgorithmSelection(
    pg.create_demo_filters(preprocessor=pre, tol=1.E-9,
                           max_iters=1000).values())
algs["tuned+Sweep"] = pg.ParameterTuner(
    ranker_generator=lambda params: pg.Sweep(
        pg.GenericGraphFilter(
            params, preprocessor=pre, tol=1.E-9, max_iters=1000)))

for alg in algs.values():
    print(alg.cite())  # prints a list of algorithm citations

pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5),
                   delimiter=" & ",
                   end_line="\\\\")
                              pg.Normalize(
                                  postprocessor(
                                      pg.GenericGraphFilter([1]+params,
                                                            preprocessor=pre,
                                                            error_type="iters",
                                                            max_iters=41,
                                                            optimization_dict=optimization,
                                                            preserve_norm=False))),
                             deviation_tol=1.E-6,
                             measure=measure,
                             optimizer=optimizer,
                             max_vals=[1]*40,
                             min_vals=[0]*40)


tuned = {
   "select": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.9, measure=measure),
   "tune": create_param_tuner(),
   "tuneLBFGSB": create_param_tuner(pg.lbfgsb)
}

for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3):
    print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\")

loader = pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3)
pg.benchmark_print(
    pg.benchmark_average((pg.benchmark(algorithms | tuned, loader, measure,
                                       fraction_of_training=[0.1, 0.2, 0.3], seed=list(range(1)))), posthocs=True),
    decimals=3, delimiter=" & ", end_line="\\\\")

Exemple #9
0
import pygrank as pg
_, graph, communities = next(
    pg.load_datasets_multiple_communities(["EUCore"], max_group_number=3))
train, test = pg.split(communities, 0.05)  # 5% of community members are known
algorithms = pg.create_variations(pg.create_demo_filters(), pg.Normalize)

supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                             measure=pg.AUC)
print(supervised_algorithm.cite())
modularity_algorithm = pg.AlgorithmSelection(
    algorithms.values(),
    fraction_of_training=1,
    measure=pg.Modularity().as_supervised_method())

linkauc_algorithm = None
best_evaluation = 0
linkAUC = pg.LinkAssessment(
    graph, similarity="cos",
    hops=1)  # LinkAUC, because emails systemically exhibit homophily
for algorithm in algorithms.values():
    evaluation = linkAUC.evaluate({
        community: algorithm(graph, seeds)
        for community, seeds in train.items()
    })
    if evaluation > best_evaluation:
        best_evaluation = evaluation
        linkauc_algorithm = algorithm

supervised_aucs = list()
modularity_aucs = list()
linkauc_aucs = list()
                  preprocessor=pre,
                  max_iters=10000,
                  tol=tol,
                  optimization_dict=optimization),
    "hk7":
    pg.HeatKernel(t=7,
                  preprocessor=pre,
                  max_iters=10000,
                  tol=tol,
                  optimization_dict=optimization),
}
algorithms = algorithms  # | pg.benchmarks.create_variations(algorithms, {"+sweep": pg.Sweep})

tuned = {
    "selected":
    pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8),
    #"tuned": pg.ParameterTuner(preprocessor=pre, fraction_of_training=0.8, tol=tol, optimization_dict=optimization, measure=pg.AUC),
    "arnoldi":
    pg.HopTuner(preprocessor=pre,
                basis="arnoldi",
                measure=pg.Cos,
                tol=tol,
                optimization_dict=optimization),
    #"arnoldi2": pg.ParameterTuner(lambda params: pg.HopTuner(preprocessor=pre, basis="arnoldi", num_parameters=int(params[0]),
    #                                                         measure=pg.Cos,
    #                                                         tol=tol, optimization_dict=optimization, tunable_offset=None),
    #                              max_vals=[40], min_vals=[5], divide_range=2, fraction_of_training=0.1),
}

#algorithms = pg.create_variations(algorithms, {"": pg.Tautology, "+Sweep": pg.Sweep})
#print(algorithms.keys())
postprocessor = pg.Tautology
algorithms = pg.benchmarks.create_variations(algorithms, postprocessor)
measure = pg.Time
optimization = pg.SelfClearDict()


def create_param_tuner(optimizer=pg.optimize):
    return pg.ParameterTuner(lambda params:
                      pg.Normalize(postprocessor(
                          pg.GenericGraphFilter([1]+params, preprocessor=pre, error_type="iters", max_iters=41,
                                                optimization_dict=optimization, preserve_norm=False))),
                      deviation_tol=1.E-6, measure=measure, optimizer=optimizer, max_vals=[1]*40, min_vals=[0]*40)



tuned = {
   "select": pg.AlgorithmSelection(algorithms.values()),#, combined_prediction=False),
   #"tune": create_param_tuner(),
   #"tuneLBFGSB": create_param_tuner(pg.lbfgsb)
}

for name, graph, group in pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3):
    print(" & ".join([str(val) for val in [name, len(graph), graph.number_of_edges(), len(group)]])+" \\\\")

loader = pg.load_datasets_all_communities(datasets, min_group_size=community_size, max_group_number=3)
pg.benchmark_print(
    pg.benchmark_average((pg.benchmark(algorithms | tuned, loader, measure,
                   fraction_of_training=[0.1, 0.2, 0.3], seed=list(range(1)))), posthocs=True),
                   decimals=3, delimiter=" & ", end_line="\\\\")