def test_explicit_citations(): assert "unknown node ranking algorithm" == pg.NodeRanking().cite() assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner( lambda params: pg.PageRank(params[0])).cite() assert "Postprocessor" in pg.Postprocessor().cite() assert pg.PageRank().cite() in pg.AlgorithmSelection().cite() assert "krasanakis2021pygrank" in pg.ParameterTuner().cite() assert "ortega2018graph" in pg.ParameterTuner().cite() assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite() assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite() assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite() assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite() assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite() assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite() assert "tautology" in pg.Tautology().cite() assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite() assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite() assert "max normalization" in pg.Normalize(pg.PageRank()).cite() assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite() assert "ordinal" in pg.Ordinals(pg.PageRank()).cite() assert "exp" in pg.Transformer(pg.PageRank()).cite() assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite() assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite() assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite() assert "LFPRO" in pg.AdHocFairness("O").cite() assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite() assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite() assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite() assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
def test_lowpass_tuning(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1) auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training)) assert auc2 > auc1*0.8
def test_chebyshev(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) # do not test with tensorflow, as it can be too slow training, evaluation = pg.split(pg.to_signal(graph, {v: 1 for v in group})) tuned_auc = pg.AUC(evaluation, training).evaluate(pg.ParameterTuner().rank( graph, training)) tuned_chebyshev_auc = pg.AUC(evaluation, training).evaluate( pg.ParameterTuner(coefficient_type="chebyshev").rank(graph, training)) assert (tuned_auc - tuned_chebyshev_auc) < 0.1
def test_one_community_benchmarks(): pg.load_backend("numpy") datasets = ["graph9", "bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9), "tuned": pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9), } # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling}) # loader = pg.load_datasets_one_community(datasets) # pg.benchmark(algorithms, loader, "time", verbose=True) loader = pg.load_datasets_one_community(datasets) pg.benchmark_print( pg.benchmark_average( pg.benchmark_ranks( pg.benchmark(algorithms, loader, pg.AUC, fraction_of_training=.8))))
def test_autotune_manual(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training) auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training)) assert auc1 <= auc2
def test_autotune(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training)) assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
def test_optimization_dict(): pg.load_backend("numpy") from timeit import default_timer as time graph = next(pg.load_datasets_graph(["bigraph"])) personalization = {str(i): 1 for i in range(200)} preprocessor = pg.preprocessor(assume_immutability=True) preprocessor(graph) tic = time() for _ in range(10): pg.ParameterTuner(preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) unoptimized = time()-tic optimization = dict() tic = time() for _ in range(10): pg.ParameterTuner(optimization_dict=optimization, preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) optimized = time() - tic assert len(optimization) == 20 assert unoptimized > optimized
def community_detection(graph, known_members_set): ranks_set = [ pg.ParameterTuner()(graph, known_members) for known_members in known_members_set ] options = list(range(len(ranks_set))) found_set = [list() for _ in known_members_set] for v in graph: found_set[max(options, key=lambda i: ranks_set[i][v])].append(v) return found_set
def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy")
def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy")
def create_param_tuner(optimizer=pg.optimize): return pg.ParameterTuner(lambda params: pg.Normalize( postprocessor( pg.GenericGraphFilter([1]+params, preprocessor=pre, error_type="iters", max_iters=41, optimization_dict=optimization, preserve_norm=False))), deviation_tol=1.E-6, measure=measure, optimizer=optimizer, max_vals=[1]*40, min_vals=[0]*40)
def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs, )), Dense(hidden, activation="relu", kernel_regularizer=L2(0.005)), Dropout(0.5), Dense(num_outputs) ]) pre = pg.preprocessor(renormalize=True, assume_immutability=True) self.ranker = pg.ParameterTuner(lambda par: pg.GenericGraphFilter( [par[0]**i for i in range(int(10))], preprocessor=pre, error_type="iters", max_iters=10), max_vals=[1], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.01, tuning_backend="numpy")
def overlapping_community_detection(graph, known_members, top=None): graph_filter = pg.PageRank( 0.9) if len(known_members) < 50 else pg.ParameterTuner().tune( graph, known_members) ranks = pg.to_signal(graph, {v: 1 for v in known_members }) >> pg.Sweep(graph_filter) >> pg.Normalize("range") if top is not None: ranks = ranks * (1 - pg.to_signal(graph, {v: 1 for v in known_members}) ) # set known member scores to zero return sorted(list(graph), key=lambda node: -ranks[node] )[:top] # return specific number of top predictions threshold = pg.optimize(max_vals=[1], loss=lambda p: pg.Conductance(graph) (pg.Threshold(p[0]).transform(ranks)))[0] known_members = set(known_members) return [ v for v in graph if ranks[v] > threshold and v not in known_members ]
def test_autotune_citations(): assert pg.ParameterTuner().cite() != pg.GenericGraphFilter().cite() assert pg.HopTuner().cite() != pg.GenericGraphFilter().cite() assert pg.AlgorithmSelection().cite() != pg.GenericGraphFilter().cite()
import pygrank as pg datasets = ["EUCore", "Amazon"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep}) loader = pg.load_datasets_one_community(datasets) algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000) algs["selected"] = pg.AlgorithmSelection( pg.create_demo_filters(preprocessor=pre, tol=1.E-9, max_iters=1000).values()) algs["tuned+Sweep"] = pg.ParameterTuner( ranker_generator=lambda params: pg.Sweep( pg.GenericGraphFilter( params, preprocessor=pre, tol=1.E-9, max_iters=1000))) for alg in algs.values(): print(alg.cite()) # prints a list of algorithm citations pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5), delimiter=" & ", end_line="\\\\")
import pygrank as pg datasets = ["friendster"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") # common preprocessor algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre), "ppr.99": pg.PageRank(.99, preprocessor=pre, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre), "hk5": pg.HeatKernel(5, preprocessor=pre), "tuned": pg.ParameterTuner(preprocessor=pre) } loader = pg.load_datasets_one_community(datasets) pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5))