Exemple #1
0
def test_seed_oversampling():
    _, graph, group = next(pg.load_datasets_one_community(["graph9"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=2)
        training, evaluation = pg.to_signal(graph,
                                            {v: 1
                                             for v in training}), pg.to_signal(
                                                 graph,
                                                 {v: 1
                                                  for v in evaluation})
        for measure in [pg.NDCG, pg.AUC]:
            ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training)
            base_result = measure(evaluation, training).evaluate(ranks)
            ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank(
                graph, training)
            so_result = measure(evaluation, training).evaluate(ranks)
            bso_result = measure(evaluation, training).evaluate(
                pg.BoostedSeedOversampling(pg.PageRank(0.9,
                                                       max_iters=1000)).rank(
                                                           graph, training))
            assert float(base_result) <= float(so_result)
            assert float(so_result) <= float(bso_result)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "top").rank(graph, training)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "neighbors").rank(graph, training)
        pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000),
                                   'naive',
                                   oversample_from_iteration='original').rank(
                                       graph, {"A": 1})
Exemple #2
0
def test_seed_oversampling_arguments():
    _, graph, group = next(pg.load_datasets_one_community(["graph9"]))
    with pytest.raises(Exception):
        pg.SeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1})
    with pytest.raises(Exception):
        pg.SeedOversampling(pg.PageRank()).rank(graph, {"A": 0.1, "B": 1})
    with pytest.raises(Exception):
        pg.BoostedSeedOversampling(pg.PageRank(),
                                   'unknown').rank(graph, {"A": 1})
    with pytest.raises(Exception):
        pg.BoostedSeedOversampling(pg.PageRank(),
                                   'naive',
                                   oversample_from_iteration='unknown').rank(
                                       graph, {"B": 1})
Exemple #3
0
def test_postprocessor_citations():
    assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite()
    assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite()
    assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \
           != pg.SeedOversampling(pg.PageRank(), method="top").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
Exemple #4
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
Exemple #5
0
def test_krylov_space_oversampling():
    # this demonstrates a highly complicated setting
    _, graph, community = next(pg.load_datasets_one_community(["bigraph"]))
    algorithm = pg.HeatKernel(
        t=5,  # the number of hops away HeatKernel places maximal importance on
        krylov_dims=5,
        normalization="symmetric",
        renormalize=True)
    for _ in supported_backends():
        personalization = {node: 1. for node in list(community)[:10]}
        oversampling = pg.SeedOversampling(algorithm)
        pg.Normalize(oversampling)(graph, personalization)
        measure = pg.Conductance()
        assert measure(pg.Normalize(algorithm)(
            graph, personalization)) >= measure(
                pg.Normalize(oversampling)(graph, personalization)) - 5.E-6
Exemple #6
0
             personalization: pg.GraphSignalData = None,
             **kwargs):
        personalization = pg.to_signal(graph, personalization)
        graph = personalization.graph
        ranks = self.ranker(personalization)
        ret = 0
        total_sum = pg.sum(ranks)
        accum_sum = 0
        for threshold in sorted(ranks.values()):
            accum_sum += threshold
            if accum_sum > total_sum * 0.1:
                break
        for i, v in enumerate(ranks):
            pg.utils.log(f"{i}/{len(ranks)}")
            if ranks[v] >= threshold:
                partial = ranks >> pg.Threshold(ranks[v],
                                                inclusive=True) >> self.ranker
                ret = partial * ranks[v] + ret
        return ret


algs = {
    "ppr": pg.PageRank(0.9),
    "ppr+so": pg.PageRank(0.9) >> pg.SeedOversampling(),
    "ppr+bso": pg.PageRank(0.9) >> pg.BoostedSeedOversampling(),
    "ppr+sso": pg.PageRank(0.9) >> StochasticSeedOversampling(),
}

loader = pg.load_datasets_one_community(["citeseer"])
pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, 3))
Exemple #7
0
import pygrank as pg
_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
algorithm = pg.HeatKernel(
    t=5,  # the number of hops away HeatKernel places maximal importance on
    normalization="symmetric",
    renormalize=True)
personalization = {node: 1.
                   for node in community}  # ignored nodes assumed to be zeroes
algorithms = {
    "HK5": algorithm,
    "HK5+Oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+Sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, {"": pg.Normalize})

measure = pg.Conductance()
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    print(algorithm_name, measure(scores))
import pygrank as pg

algorithm = pg.HeatKernel(
    t=5,  # the number of hops to place maximal importance on
    normalization="symmetric",
    renormalize=True)
algorithms = {
    "hk5": algorithm,
    "hk5+oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, pg.Normalize)

_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
personalization = {node: 1.
                   for node in community}  # missing scores considered zero
measure = pg.Conductance()  # smaller means tightly-knit stochastic community
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    pg.benchmark_print_line(algorithm_name, measure(scores),
                            tabs=[20, 5])  # pretty