def test_all_communities_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)} loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_autorefs(): """ Tests that different (base) algorithms yield different citations, that all citations have at least one reference to a publication and that wrapping the same base algorithms yields the same citations. """ pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5'": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations( algs, { "+Sweep": pg.Sweep, "+SO": pg.SeedOversampling, "+BSO": pg.BoostedSeedOversampling }) citations = set() for alg in algs.values(): citation = alg.cite() assert "\\cite{" in citation citations.add(citation) assert len(citations) == len(algs) - 4
def test_one_community_benchmarks(): pg.load_backend("numpy") datasets = ["graph9", "bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9), "tuned": pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9), } # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling}) # loader = pg.load_datasets_one_community(datasets) # pg.benchmark(algorithms, loader, "time", verbose=True) loader = pg.load_datasets_one_community(datasets) pg.benchmark_print( pg.benchmark_average( pg.benchmark_ranks( pg.benchmark(algorithms, loader, pg.AUC, fraction_of_training=.8))))
def test_completion(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): pg.PageRank().rank(graph) pg.HeatKernel().rank(graph) pg.AbsorbingWalks().rank(graph) pg.HeatKernel().rank(graph) assert True
def test_multigroup_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = { "selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8) } loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark( algorithms | tuned, loader, lambda ground_truth, exclude: pg.MultiSupervised( pg.AUC, ground_truth, exclude), fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_completion(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): pg.PageRank().rank(graph) pg.PageRank(normalization="both").rank(graph) pg.HeatKernel().rank(graph) pg.AbsorbingWalks().rank(graph) pg.SymmetricAbsorbingRandomWalks().rank(graph) pg.HeatKernel().rank(graph) assert True
def test_filter_citations(): assert pg.PageRank().cite() != pg.GraphFilter().cite() assert pg.HeatKernel().cite() != pg.GraphFilter().cite() assert pg.AbsorbingWalks().cite() != pg.GraphFilter().cite() assert pg.HeatKernel().cite() != pg.GraphFilter().cite() assert pg.PageRank(alpha=0.85).cite() != pg.PageRank(alpha=0.99).cite() assert pg.HeatKernel(krylov_dims=0).cite() != pg.HeatKernel(krylov_dims=5).cite() assert pg.HeatKernel(coefficient_type="taylor").cite() != pg.HeatKernel(coefficient_type="chebyshev").cite() assert pg.HeatKernel(optimization_dict=dict()).cite() != pg.HeatKernel(optimization_dict=None).cite()
def test_autotune(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training)) assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
def test_explicit_citations(): assert "unknown node ranking algorithm" == pg.NodeRanking().cite() assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner( lambda params: pg.PageRank(params[0])).cite() assert "Postprocessor" in pg.Postprocessor().cite() assert pg.PageRank().cite() in pg.AlgorithmSelection().cite() assert "krasanakis2021pygrank" in pg.ParameterTuner().cite() assert "ortega2018graph" in pg.ParameterTuner().cite() assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite() assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite() assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite() assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite() assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite() assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite() assert "tautology" in pg.Tautology().cite() assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite() assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite() assert "max normalization" in pg.Normalize(pg.PageRank()).cite() assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite() assert "ordinal" in pg.Ordinals(pg.PageRank()).cite() assert "exp" in pg.Transformer(pg.PageRank()).cite() assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite() assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite() assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite() assert "LFPRO" in pg.AdHocFairness("O").cite() assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite() assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite() assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite() assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
def test_auc_ndcg_compliance(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) training, test = pg.split(group, 0.5) for _ in supported_backends(): scores1 = pg.PageRank()(graph, training) scores2 = pg.HeatKernel()(graph, training) AUC1 = pg.AUC(test, exclude=training)(scores1) AUC2 = pg.AUC(test, exclude=training)(scores2) NDCG1 = float(pg.NDCG(test, exclude=training)(scores1)) NDCG2 = float(pg.NDCG(test, exclude=training)(scores2)) assert (AUC1 < AUC2) == (NDCG1 < NDCG2) with pytest.raises(Exception): pg.AUC(test, exclude=test, k=len(graph) + 1)(scores2) with pytest.raises(Exception): pg.NDCG(test, exclude=training, k=len(graph) + 1)(scores2)
def test_krylov_space_oversampling(): # this demonstrates a highly complicated setting _, graph, community = next(pg.load_datasets_one_community(["bigraph"])) algorithm = pg.HeatKernel( t=5, # the number of hops away HeatKernel places maximal importance on krylov_dims=5, normalization="symmetric", renormalize=True) for _ in supported_backends(): personalization = {node: 1. for node in list(community)[:10]} oversampling = pg.SeedOversampling(algorithm) pg.Normalize(oversampling)(graph, personalization) measure = pg.Conductance() assert measure(pg.Normalize(algorithm)( graph, personalization)) >= measure( pg.Normalize(oversampling)(graph, personalization)) - 5.E-6
import pygrank as pg #datasets = ["acm", "amazon", "ant", "citeseer","dblp","facebook0","facebook686","log4j","maven","pubmed","squirel", "twitter"] datasets = [ "facebook0", "facebook686", "log4j", "ant", "eucore", "citeseer", "dblp" ] seed_fractions = [0.3, 0.5] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") filters = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-6), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-6), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-6), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=1.E-6), } filters = pg.create_variations(filters, {"": pg.Tautology, "+Sweep": pg.Sweep}) for name, filter in filters.items(): print("=====", name, "=====") algorithms = { "None": filter, "Mult": pg.AdHocFairness(filter, "B"), "LFPRO": pg.AdHocFairness(filter, "O"), #"FBuck-C": pg.FairPersonalizer(filter, .8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, parameter_buckets=0),
import pygrank as pg datasets = ["friendster"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") # common preprocessor algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre), "ppr.99": pg.PageRank(.99, preprocessor=pre, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre), "hk5": pg.HeatKernel(5, preprocessor=pre), "tuned": pg.ParameterTuner(preprocessor=pre) } loader = pg.load_datasets_one_community(datasets) pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5))
import pygrank as pg datasets = ["amazon", "citeseer", "maven"] community_size = 500 pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") convergence = {"tol": 1.E-9, "max_iters": 10000} #convergence = {"error_type": "iters", "max_iters": 41} algorithms = { "ppr0.5": pg.PageRank(alpha=0.5, preprocessor=pre, **convergence), "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, **convergence), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, **convergence), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, **convergence), "hk2": pg.HeatKernel(t=2, preprocessor=pre, **convergence), "hk3": pg.HeatKernel(t=3, preprocessor=pre, **convergence), "hk5": pg.HeatKernel(t=5, preprocessor=pre, **convergence), "hk7": pg.HeatKernel(t=7, preprocessor=pre, **convergence), } postprocessor = pg.Tautology algorithms = pg.benchmarks.create_variations(algorithms, postprocessor) measure = pg.AUC optimization = pg.SelfClearDict() def create_param_tuner(optimizer=pg.optimize): return pg.ParameterTuner(lambda params: pg.Normalize( postprocessor( pg.GenericGraphFilter([1]+params,
def test_filter_invalid_parameters(): graph = next(pg.load_datasets_graph(["graph5"])) with pytest.raises(Exception): pg.HeatKernel(normalization="unknown").rank(graph) with pytest.raises(Exception): pg.HeatKernel(coefficient_type="unknown").rank(graph)
import pygrank as pg _, graph, community = next(pg.load_datasets_one_community(["EUCore"])) algorithm = pg.HeatKernel( t=5, # the number of hops away HeatKernel places maximal importance on normalization="symmetric", renormalize=True) personalization = {node: 1. for node in community} # ignored nodes assumed to be zeroes algorithms = { "HK5": algorithm, "HK5+Oversampling": pg.SeedOversampling(algorithm) } algorithms = algorithms | pg.create_variations(algorithms, {"+Sweep": pg.Sweep}) algorithms = pg.create_variations(algorithms, {"": pg.Normalize}) measure = pg.Conductance() for algorithm_name, algorithm in algorithms.items(): scores = algorithm(graph, personalization) # returns a dict-like pg.GraphSignal print(algorithm_name, measure(scores))
def test_filter_as_postprocessor(): assert isinstance(pg.HeatKernel() >> pg.PageRank(normalization="salsa"), pg.PageRank)
datasets = ["eucore", "citeseer", "blockmodel"] #datasets = ["maven"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), }
import pygrank as pg datasets = ["EUCore", "Amazon"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep}) loader = pg.load_datasets_one_community(datasets) algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000) algs["selected"] = pg.AlgorithmSelection( pg.create_demo_filters(preprocessor=pre, tol=1.E-9, max_iters=1000).values()) algs["tuned+Sweep"] = pg.ParameterTuner( ranker_generator=lambda params: pg.Sweep( pg.GenericGraphFilter( params, preprocessor=pre, tol=1.E-9, max_iters=1000))) for alg in algs.values(): print(alg.cite()) # prints a list of algorithm citations pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5), delimiter=" & ", end_line="\\\\")