def test_explicit_citations(): assert "unknown node ranking algorithm" == pg.NodeRanking().cite() assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner( lambda params: pg.PageRank(params[0])).cite() assert "Postprocessor" in pg.Postprocessor().cite() assert pg.PageRank().cite() in pg.AlgorithmSelection().cite() assert "krasanakis2021pygrank" in pg.ParameterTuner().cite() assert "ortega2018graph" in pg.ParameterTuner().cite() assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite() assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite() assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite() assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite() assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite() assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite() assert "tautology" in pg.Tautology().cite() assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite() assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite() assert "max normalization" in pg.Normalize(pg.PageRank()).cite() assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite() assert "ordinal" in pg.Ordinals(pg.PageRank()).cite() assert "exp" in pg.Transformer(pg.PageRank()).cite() assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite() assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite() assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite() assert "LFPRO" in pg.AdHocFairness("O").cite() assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite() assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite() assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite() assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
def test_sweep_streaming(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( (pg.PageRank() >> pg.Sweep()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) auc3 = pg.AUC( {v: 1 for v in evaluation}, exclude=training).evaluate( pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep() | pg.to_signal(graph, {v: 1 for v in training})) assert auc1 > auc2 assert abs(auc1 - auc3) < pg.epsilon() with pytest.raises(Exception): pg.Sweep() << "a"
def test_seed_top(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) original_training = set(training) from random import random, seed seed(0) training, evaluation = pg.to_signal(graph, {v: 1 for v in graph if v in original_training or random() < 0.5}), \ pg.to_signal(graph, {v: 1 for v in evaluation}) for measure in [pg.AUC, pg.NDCG]: #ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) #base_result = measure(evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(pg.Sweep(pg.PageRank(0.9, max_iters=1000)), 0.9).rank(graph, training) undersampled_result1 = measure( evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(2, pg.Sweep(pg.PageRank(0.9, max_iters=1000))).rank( graph, training) undersampled_result2 = measure( evaluation, list(original_training)).evaluate(ranks)
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(pg.PageRank())).rank( graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold("gap").transform(pg.PageRank().rank( graph, {v: 1 for v in training}))) # try all api types assert cond1 <= cond2
def test_sweep(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( pg.Sweep(pg.PageRank()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) assert auc1 > auc2
def test_postprocessor_citations(): assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite() assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite() assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite() assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \ != pg.SeedOversampling(pg.PageRank(), method="top").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
def overlapping_community_detection(graph, known_members, top=None): graph_filter = pg.PageRank( 0.9) if len(known_members) < 50 else pg.ParameterTuner().tune( graph, known_members) ranks = pg.to_signal(graph, {v: 1 for v in known_members }) >> pg.Sweep(graph_filter) >> pg.Normalize("range") if top is not None: ranks = ranks * (1 - pg.to_signal(graph, {v: 1 for v in known_members}) ) # set known member scores to zero return sorted(list(graph), key=lambda node: -ranks[node] )[:top] # return specific number of top predictions threshold = pg.optimize(max_vals=[1], loss=lambda p: pg.Conductance(graph) (pg.Threshold(p[0]).transform(ranks)))[0] known_members = set(known_members) return [ v for v in graph if ranks[v] > threshold and v not in known_members ]
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) algorithm = pg.PageRank() cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(algorithm), "gap").rank(graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold(0.3).transform( algorithm.rank(graph, {v: 1 for v in training}))) # try all api types cond3 = pg.Conductance().evaluate( pg.Threshold(1).transform( algorithm.rank( graph, {v: 1 for v in training}))) # should yield infinite conductance # TODO: find an algorithm other than gap to outperform 0.2 threshold too assert cond1 <= cond2 assert cond2 <= cond3
import pygrank as pg datasets = ["EUCore", "Amazon"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep}) loader = pg.load_datasets_one_community(datasets) algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000) algs["selected"] = pg.AlgorithmSelection( pg.create_demo_filters(preprocessor=pre, tol=1.E-9, max_iters=1000).values()) algs["tuned+Sweep"] = pg.ParameterTuner( ranker_generator=lambda params: pg.Sweep( pg.GenericGraphFilter( params, preprocessor=pre, tol=1.E-9, max_iters=1000))) for alg in algs.values(): print(alg.cite()) # prints a list of algorithm citations pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5), delimiter=" & ", end_line="\\\\")