Esempio n. 1
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
Esempio n. 2
0
def test_sweep_streaming():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           (pg.PageRank() >> pg.Sweep()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        auc3 = pg.AUC(
            {v: 1
             for v in evaluation}, exclude=training).evaluate(
                 pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep()
                 | pg.to_signal(graph, {v: 1
                                        for v in training}))
        assert auc1 > auc2
        assert abs(auc1 - auc3) < pg.epsilon()

    with pytest.raises(Exception):
        pg.Sweep() << "a"
Esempio n. 3
0
def test_seed_top():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=2)
        original_training = set(training)
        from random import random, seed
        seed(0)
        training, evaluation = pg.to_signal(graph, {v: 1 for v in graph if v in original_training or random() < 0.5}), \
                               pg.to_signal(graph, {v: 1 for v in evaluation})
        for measure in [pg.AUC, pg.NDCG]:
            #ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training)
            #base_result = measure(evaluation, list(original_training)).evaluate(ranks)
            ranks = pg.Top(pg.Sweep(pg.PageRank(0.9, max_iters=1000)),
                           0.9).rank(graph, training)
            undersampled_result1 = measure(
                evaluation, list(original_training)).evaluate(ranks)
            ranks = pg.Top(2, pg.Sweep(pg.PageRank(0.9, max_iters=1000))).rank(
                graph, training)
            undersampled_result2 = measure(
                evaluation, list(original_training)).evaluate(ranks)
Esempio n. 4
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(pg.PageRank())).rank(
                graph, {v: 1
                        for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold("gap").transform(pg.PageRank().rank(
                graph, {v: 1
                        for v in training})))  # try all api types
        assert cond1 <= cond2
Esempio n. 5
0
def test_sweep():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           pg.Sweep(pg.PageRank()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        assert auc1 > auc2
Esempio n. 6
0
def test_postprocessor_citations():
    assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite()
    assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite()
    assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \
           != pg.SeedOversampling(pg.PageRank(), method="top").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
Esempio n. 7
0
def overlapping_community_detection(graph, known_members, top=None):
    graph_filter = pg.PageRank(
        0.9) if len(known_members) < 50 else pg.ParameterTuner().tune(
            graph, known_members)
    ranks = pg.to_signal(graph,
                         {v: 1
                          for v in known_members
                          }) >> pg.Sweep(graph_filter) >> pg.Normalize("range")
    if top is not None:
        ranks = ranks * (1 - pg.to_signal(graph, {v: 1
                                                  for v in known_members})
                         )  # set known member scores to zero
        return sorted(list(graph), key=lambda node: -ranks[node]
                      )[:top]  # return specific number of top predictions

    threshold = pg.optimize(max_vals=[1],
                            loss=lambda p: pg.Conductance(graph)
                            (pg.Threshold(p[0]).transform(ranks)))[0]
    known_members = set(known_members)
    return [
        v for v in graph if ranks[v] > threshold and v not in known_members
    ]
Esempio n. 8
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        algorithm = pg.PageRank()
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(algorithm),
                         "gap").rank(graph, {v: 1
                                             for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold(0.3).transform(
                algorithm.rank(graph,
                               {v: 1
                                for v in training})))  # try all api types
        cond3 = pg.Conductance().evaluate(
            pg.Threshold(1).transform(
                algorithm.rank(
                    graph,
                    {v: 1
                     for v in training})))  # should yield infinite conductance
        # TODO: find an algorithm other than gap to outperform 0.2 threshold too
        assert cond1 <= cond2
        assert cond2 <= cond3
Esempio n. 9
0
import pygrank as pg
datasets = ["EUCore", "Amazon"]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
algs = {
    "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
}

algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep})
loader = pg.load_datasets_one_community(datasets)
algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000)
algs["selected"] = pg.AlgorithmSelection(
    pg.create_demo_filters(preprocessor=pre, tol=1.E-9,
                           max_iters=1000).values())
algs["tuned+Sweep"] = pg.ParameterTuner(
    ranker_generator=lambda params: pg.Sweep(
        pg.GenericGraphFilter(
            params, preprocessor=pre, tol=1.E-9, max_iters=1000)))

for alg in algs.values():
    print(alg.cite())  # prints a list of algorithm citations

pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5),
                   delimiter=" & ",
                   end_line="\\\\")