def test_one_community_benchmarks(): pg.load_backend("numpy") datasets = ["graph9", "bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9), "tuned": pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9), } # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling}) # loader = pg.load_datasets_one_community(datasets) # pg.benchmark(algorithms, loader, "time", verbose=True) loader = pg.load_datasets_one_community(datasets) pg.benchmark_print( pg.benchmark_average( pg.benchmark_ranks( pg.benchmark(algorithms, loader, pg.AUC, fraction_of_training=.8))))
def test_all_communities_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)} loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_autorefs(): """ Tests that different (base) algorithms yield different citations, that all citations have at least one reference to a publication and that wrapping the same base algorithms yields the same citations. """ pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5'": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations( algs, { "+Sweep": pg.Sweep, "+SO": pg.SeedOversampling, "+BSO": pg.BoostedSeedOversampling }) citations = set() for alg in algs.values(): citation = alg.cite() assert "\\cite{" in citation citations.add(citation) assert len(citations) == len(algs) - 4
def test_sweep_streaming(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( (pg.PageRank() >> pg.Sweep()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) auc3 = pg.AUC( {v: 1 for v in evaluation}, exclude=training).evaluate( pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep() | pg.to_signal(graph, {v: 1 for v in training})) assert auc1 > auc2 assert abs(auc1 - auc3) < pg.epsilon() with pytest.raises(Exception): pg.Sweep() << "a"
def test_autotune_manual(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training) auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training)) assert auc1 <= auc2
def test_filter_citations(): assert pg.PageRank().cite() != pg.GraphFilter().cite() assert pg.HeatKernel().cite() != pg.GraphFilter().cite() assert pg.AbsorbingWalks().cite() != pg.GraphFilter().cite() assert pg.HeatKernel().cite() != pg.GraphFilter().cite() assert pg.PageRank(alpha=0.85).cite() != pg.PageRank(alpha=0.99).cite() assert pg.HeatKernel(krylov_dims=0).cite() != pg.HeatKernel(krylov_dims=5).cite() assert pg.HeatKernel(coefficient_type="taylor").cite() != pg.HeatKernel(coefficient_type="chebyshev").cite() assert pg.HeatKernel(optimization_dict=dict()).cite() != pg.HeatKernel(optimization_dict=None).cite()
def test_multigroup_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = { "selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8) } loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark( algorithms | tuned, loader, lambda ground_truth, exclude: pg.MultiSupervised( pg.AUC, ground_truth, exclude), fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_correlation_compliance(): graph = next(pg.load_datasets_graph(["graph5"])) # TODO: Make spearman and pearson correlation support tensorflow alg1 = pg.PageRank(alpha=0.5) alg2 = pg.PageRank(alpha=0.99) pearson_ordinals = pg.PearsonCorrelation(pg.Ordinals(alg1)(graph))( pg.Ordinals(alg2)(graph)) spearman = pg.SpearmanCorrelation(alg1(graph))(alg2(graph)) assert pearson_ordinals == spearman
def test_completion(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): pg.PageRank().rank(graph) pg.PageRank(normalization="both").rank(graph) pg.HeatKernel().rank(graph) pg.AbsorbingWalks().rank(graph) pg.SymmetricAbsorbingRandomWalks().rank(graph) pg.HeatKernel().rank(graph) assert True
def test_quotient(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): test_result = pg.PageRank(normalization='symmetric', tol=max(1.E-9, pg.epsilon()), use_quotient=True).rank(graph) norm_result = pg.PageRank(normalization='symmetric', tol=max(1.E-9, pg.epsilon()), use_quotient=pg.Normalize("sum")).rank(graph) assert pg.Mabs(test_result)(norm_result) < pg.epsilon()
def test_tautology(): graph = next(pg.load_datasets_graph(["bigraph"])) r = pg.PageRank().rank(graph) tr = pg.Tautology(pg.PageRank()).rank(graph) rt = pg.Tautology().transform(r) for u in graph: assert r[u] == rt[u] assert r[u] == tr[u] u = pg.Tautology().rank(graph) assert float(sum(u.np)) == len(graph)
def test_transform(): import math graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): r1 = pg.Normalize(pg.PageRank(), "sum").rank(graph) r2 = pg.Transformer(pg.PageRank(), lambda x: x / pg.sum(x)).rank(graph) assert pg.Mabs(r1)(r2) < pg.epsilon() r1 = pg.Transformer(math.exp).transform(pg.PageRank()(graph)) r2 = pg.Transformer(pg.PageRank(), pg.exp).rank(graph) assert pg.Mabs(r1)(r2) < pg.epsilon()
def test_automatic_graph_casting(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): signal = pg.to_signal(graph, {"A": 1}) test_result1 = pg.PageRank(normalization='col').rank(signal, signal) test_result2 = pg.PageRank(normalization='col').rank(personalization=signal) assert pg.Mabs(test_result1)(test_result2) < pg.epsilon() with pytest.raises(Exception): pg.PageRank(normalization='col').rank(personalization={"A": 1}) with pytest.raises(Exception): pg.PageRank(normalization='col').rank(graph.copy(), signal)
def test_stream(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): ranks1 = pg.Normalize( pg.PageRank(0.85, tol=pg.epsilon(), max_iters=1000, use_quotient=False)).rank(graph, {"A": 1}) ranks2 = pg.to_signal(graph, {"A": 1}) >> pg.PageRank( 0.85, tol=pg.epsilon(), max_iters=1000) + pg.Tautology() >> pg.Normalize() assert pg.Mabs(ranks1)(ranks2) < pg.epsilon()
def test_sequential(): graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): prior = pg.to_signal(graph, {"A": 2}) posterior1 = pg.Normalize(pg.PageRank(), "range").rank(prior) posterior2 = pg.Normalize("range")(pg.PageRank()(prior)) posterior3 = pg.Sequential(pg.PageRank(), pg.Normalize("range")).rank(prior) assert pg.sum(pg.abs(posterior1 - posterior2)) < pg.epsilon( ) # TODO: investigate when not exactly zero assert pg.sum(pg.abs(posterior1 - posterior3)) < pg.epsilon( ) # TODO: investigate when not exactly zero
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(pg.PageRank())).rank( graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold("gap").transform(pg.PageRank().rank( graph, {v: 1 for v in training}))) # try all api types assert cond1 <= cond2
def test_filter_stream(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): test_result = pg.Normalize( pg.PageRank(normalization='symmetric', tol=max(1.E-9, pg.epsilon()), use_quotient=True)).rank(graph) norm_result = pg.PageRank(tol=max(1.E-9, pg.epsilon())) \ + pg.preprocessor(normalization='symmetric') \ + pg.Normalize("sum") \ >> pg.Normalize() \ | pg.to_signal(graph, {v: 1 for v in graph}) assert pg.Mabs(test_result)(norm_result) < pg.epsilon()
def test_venuerank(): graph = next(pg.load_datasets_graph(["bigraph"])) for _ in supported_backends(): venuerank = pg.PageRank(alpha=0.85, max_iters=10000, converge_to_eigenvectors=True, tol=1.E-12) venuerank_result = venuerank.rank(graph) small_restart = pg.PageRank(alpha=0.99, max_iters=10000, tol=1.E-12) small_restart_result = small_restart.rank(graph) #assert venuerank.convergence.iteration < small_restart.convergence.iteration / 2 corr = pg.SpearmanCorrelation(pg.Ordinals()(venuerank_result))( pg.Ordinals()(small_restart_result)) assert corr > 0.99
def test_seed_oversampling_arguments(): _, graph, group = next(pg.load_datasets_one_community(["graph9"])) with pytest.raises(Exception): pg.SeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1}) with pytest.raises(Exception): pg.SeedOversampling(pg.PageRank()).rank(graph, {"A": 0.1, "B": 1}) with pytest.raises(Exception): pg.BoostedSeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1}) with pytest.raises(Exception): pg.BoostedSeedOversampling(pg.PageRank(), 'naive', oversample_from_iteration='unknown').rank( graph, {"B": 1})
def test_normalize(): import networkx as nx graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): assert float( pg.sum( pg.Normalize("range").transform( pg.to_signal(nx.Graph([("A", "B")]), [2, 2])).np)) == 4 r = pg.Normalize(pg.PageRank(), "range").rank(graph) assert pg.min(r.np) == 0 assert pg.max(r.np) == 1 r = pg.Normalize(pg.PageRank(), "sum").rank(graph) assert abs(pg.sum(r.np) - 1) < pg.epsilon() with pytest.raises(Exception): pg.Normalize(pg.PageRank(), "unknown").rank(graph)
def test_sweep(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( pg.Sweep(pg.PageRank()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) assert auc1 > auc2
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank( G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize() (pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. sensitive = 1 - sensitive.np for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)(ranks) > 0.6
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank(G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize()(pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "LFPRN": lambda G, p, s: pg.Normalize()(pg.LFPR().rank(G, p, sensitive=s)), "LFPRP": lambda G, p, s: pg.Normalize()(pg.LFPR(redistributor="original").rank(G, p, sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } import networkx as nx _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"], graph_api=nx)) # TODO: networx needed due to edge weighting by some algorithms labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. else: assert pg.pRule(sensitive)(ranks) > 0.98 sensitive = 1 - sensitive.np for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 else: assert pg.pRule(sensitive)(ranks) > 0.98
def test_absorbing_vs_pagerank(): graph = next(pg.load_datasets_graph(["graph9"])) personalization = {"A": 1, "B": 1} for _ in supported_backends(): pagerank_result = pg.PageRank(normalization='col').rank(graph, personalization) absorbing_result = pg.AbsorbingWalks(0.85, normalization='col', max_iters=1000).rank(graph, personalization) assert pg.Mabs(pagerank_result)(absorbing_result) < pg.epsilon()
def test_fair_personalizer(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairPers": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_type=pg.Mabs, max_residual=0)).rank( G, p, sensitive=s), "FairPers-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, pRule_weight=10, error_type=pg.Mabs, max_residual=0)). rank(G, p, sensitive=s), "FairPersSkew": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_skewing=True, max_residual=0)).rank( G, p, sensitive=s), "FairPersSkew-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, error_skewing=True, pRule_weight=10, max_residual=0) ).rank(G, p, sensitive=s), } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.79 # allow a leeway for generalization capabilities compared to 80%
def test_separate_normalization(): graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): algorithm = pg.PageRank( preserve_norm=False) + pg.SeparateNormalization(["A", "B"]) ranks = algorithm(graph, {"A": 2}) assert abs(ranks["A"] + ranks["B"] - 1) < pg.epsilon()
def test_convergence_string_conversion(): # TODO: make convergence trackable from wrapping objects graph = next(pg.load_datasets_graph(["graph5"])) ranker = pg.PageRank() >> pg.Normalize() ranker(graph) assert str(ranker.convergence.iteration) + " iterations" in str( ranker.convergence)
def test_norm_maintain(): # TODO: investigate that 2.5*epsilon is truly something to be expected graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): prior = pg.to_signal(graph, {"A": 2}) posterior = pg.MabsMaintain(pg.Normalize(pg.PageRank(), "range")).rank(prior) assert abs(pg.sum(pg.abs(posterior.np)) - 2) < 2.5 * pg.epsilon()
def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations
def test_ordinals(): graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): test_result = pg.Ordinals( pg.Ordinals(pg.Ordinals(pg.PageRank(normalization='col')))).rank( graph, {"A": 1}) # three ordinal transformations are the same as one assert test_result["A"] == 1