def evaluate(graph, algorithm): tprs = list() ppvs = list() f1s = list() aucs = list() for node in list(graph): neighbors = list(graph.neighbors(node)) if len(neighbors) < 10: continue training = pg.to_signal(graph, {node: 1}) test = pg.to_signal(graph, {neighbor: 1 for neighbor in neighbors}) for neighbor in random.sample(neighbors, 1): assert graph.has_edge(node, neighbor) graph.remove_edge(node, neighbor) assert not graph.has_edge(node, neighbor) assert not graph.has_edge(neighbor, node) result = (training >> algorithm) * (1 - training) aucs.append(pg.AUC(test, exclude=training)(result)) top = result >> pg.Top(10) >> pg.Threshold() prec = pg.PPV(test, exclude=training)(top) rec = pg.TPR(test, exclude=training)(top) ppvs.append(prec) tprs.append(rec) f1s.append(pg.safe_div(2 * prec * rec, prec + rec)) for neighbor in graph.neighbors(node): if not graph.has_edge(node, neighbor): graph.add_edge(node, neighbor) print( f"\r{algorithm.cite()}\t AUC {sum(aucs) / len(aucs):.3f}\t f1 {sum(f1s) / len(f1s):.3f}\t prec {sum(ppvs) / len(ppvs):.3f}\t rec {sum(tprs)/len(tprs):.3f}\t", end="") print()
def test_signal_direct_operations(): for _ in supported_backends(): graph = nx.DiGraph([(1, 2), (2, 3)]) signal = pg.to_signal(graph, [1., 2., 3.]) assert pg.sum(signal) == 6 assert pg.sum(signal + 1) == 9 assert pg.sum(1 + signal) == 9 assert pg.sum(signal**2) == 14 assert pg.sum(signal - pg.to_signal(graph, [1, 2, 2])) == 1 assert pg.sum(-1 + signal) == 3 assert pg.sum(signal / pg.to_signal(graph, [1., 2., 3.])) == 3 assert pg.sum(3**signal) == 3 + 9 + 27 signal **= 2 assert pg.sum(signal) == 14 signal.np = pg.to_signal(graph, [4, 4, 4]) assert pg.sum(signal) == 12 assert pg.sum(+signal) == 12 assert pg.sum(-signal) == -12 assert pg.sum(-signal / 2) == -6 #assert pg.sum(-signal//2) == -6 assert pg.sum(2 / signal) == 1.5 #assert pg.sum(2//signal) == 0 signal += 1 assert pg.sum(signal) == 15 signal -= 1 assert pg.sum(signal) == 12 signal /= 2 assert pg.sum(signal) == 6 signal /= 2 # //= 2 assert pg.sum(signal) == 3 signal *= 4 assert pg.sum(signal) == 12 with pytest.raises(Exception): signal + pg.to_signal(graph.copy(), [1., 2., 3.])
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank(G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize()(pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "LFPRN": lambda G, p, s: pg.Normalize()(pg.LFPR().rank(G, p, sensitive=s)), "LFPRP": lambda G, p, s: pg.Normalize()(pg.LFPR(redistributor="original").rank(G, p, sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } import networkx as nx _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"], graph_api=nx)) # TODO: networx needed due to edge weighting by some algorithms labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. else: assert pg.pRule(sensitive)(ranks) > 0.98 sensitive = 1 - sensitive.np for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 else: assert pg.pRule(sensitive)(ranks) > 0.98
def test_seed_oversampling(): _, graph, group = next(pg.load_datasets_one_community(["graph9"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) training, evaluation = pg.to_signal(graph, {v: 1 for v in training}), pg.to_signal( graph, {v: 1 for v in evaluation}) for measure in [pg.NDCG, pg.AUC]: ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) base_result = measure(evaluation, training).evaluate(ranks) ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training) so_result = measure(evaluation, training).evaluate(ranks) bso_result = measure(evaluation, training).evaluate( pg.BoostedSeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training)) assert float(base_result) <= float(so_result) assert float(so_result) <= float(bso_result) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "top").rank(graph, training) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "neighbors").rank(graph, training) pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000), 'naive', oversample_from_iteration='original').rank( graph, {"A": 1})
def test_fair_personalizer(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairPers": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_type=pg.Mabs, max_residual=0)).rank( G, p, sensitive=s), "FairPers-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, pRule_weight=10, error_type=pg.Mabs, max_residual=0)). rank(G, p, sensitive=s), "FairPersSkew": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_skewing=True, max_residual=0)).rank( G, p, sensitive=s), "FairPersSkew-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, error_skewing=True, pRule_weight=10, max_residual=0) ).rank(G, p, sensitive=s), } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.79 # allow a leeway for generalization capabilities compared to 80%
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank( G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize() (pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. sensitive = 1 - sensitive.np for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)(ranks) > 0.6
def test_stream_diff(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): ranks1 = pg.GenericGraphFilter( [0, 0, 1], max_iters=4, error_type="iters") | pg.to_signal( graph, {"A": 1}) ranks2 = pg.GenericGraphFilter( [1, 1, 1], tol=None) & ~pg.GenericGraphFilter( [1, 1], tol=None) | pg.to_signal(graph, {"A": 1}) assert pg.Mabs(ranks1)(ranks2) < pg.epsilon()
def test_invalid_fairness_arguments(): _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) H = pg.PageRank(assume_immutability=True, normalization="symmetric") with pytest.raises(Exception): # this tests that a deprecated way of applying fairwalk actually raises an exception pg.AdHocFairness(H, method="FairWalk").rank(graph, labels, sensitive=sensitive) with pytest.raises(Exception): pg.FairPersonalizer(H, parity_type="universal").rank(graph, labels, sensitive=sensitive) with pytest.raises(Exception): pg.FairWalk(None).transform(H.rank(graph, labels), sensitive=sensitive)
def test_strange_input_types(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) training, test = pg.split(group) for _ in supported_backends(): scores = pg.PageRank()(graph, {v: 1 for v in training}) ndcg = pg.NDCG(pg.to_signal(scores, {v: 1 for v in test}), k=3)({v: scores[v] for v in scores}) ndcg_biased = pg.NDCG(pg.to_signal(scores, {v: 1 for v in test}), k=3)({v: scores[v] for v in test}) assert ndcg < ndcg_biased
def test_lowpass_tuning(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1) auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training)) assert auc2 > auc1*0.8
def test_hoptuner_autorgression(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.01) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC, autoregression=5).rank(training)) assert auc3 > auc1*0.9
def test_hoptuner_explicit_algorithm(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(lambda params: pg.GenericGraphFilter(params, krylov_dims=10), basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", krylov_dims=10, measure=pg.AUC).rank(training)) assert abs(auc1-auc2) < 0.005
def test_sweep_streaming(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( (pg.PageRank() >> pg.Sweep()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) auc3 = pg.AUC( {v: 1 for v in evaluation}, exclude=training).evaluate( pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep() | pg.to_signal(graph, {v: 1 for v in training})) assert auc1 > auc2 assert abs(auc1 - auc3) < pg.epsilon() with pytest.raises(Exception): pg.Sweep() << "a"
def rank(self, graph, personalization, sensitive, *args, **kwargs): personalization = pg.to_signal(graph, personalization) #if self.pretrainer is not None: # pretrain_tuner = Tensortune(self.ranker, model=self.model()) # pretrain_tuner.train_model(graph, personalization, sensitive, *args, **kwargs) return self.train_model(graph, personalization, sensitive, *args, **kwargs)
def rank(self, graph, personalization, sensitive, *args, **kwargs): original_ranks = self.ranker(graph, personalization, *args, sensitive=sensitive, **kwargs) base_ranks = original_ranks if self.ranker == self.base_ranker else self.base_ranker( graph, personalization, *args, **kwargs) training_objective = pg.AM()\ .add(pg.L2(base_ranks), weight=-1.)\ .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), weight=10., max_val=0.8) with pg.Backend("tensorflow"): ranks_var = tf.Variable(pg.to_array(original_ranks.np)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) best_loss = float('inf') best_ranks = None for epoch in range(2000): with tf.GradientTape() as tape: ranks = pg.to_signal(original_ranks, ranks_var) loss = -training_objective( ranks) #+ 1.E-5*tf.reduce_sum(ranks_var*ranks_var) grads = tape.gradient(loss, [ranks_var]) optimizer.apply_gradients(zip(grads, [ranks_var])) validation_loss = loss if validation_loss < best_loss: patience = 100 best_ranks = ranks best_loss = validation_loss patience -= 1 if patience == 0: break return best_ranks
def _transform(self, ranks: pg.GraphSignal, **kwargs): if ranks.graph not in self.known_ranks or not self.assume_immutability: with pg.Backend("numpy"): A = pg.preprocessor(normalization=self.normalization)( ranks.graph) D = pg.degrees( pg.preprocessor(normalization="none")(ranks.graph)) s = pg.sum( D)**0.5 / 2 if self.sparsity is None else self.sparsity D = (D / pg.max(D))**self.beta S = scipy.sparse.random( self.dims, A.shape[0], density=1. / s, data_rvs=lambda l: np.random.choice([-1, 1], size=l), format="csc") S = S @ scipy.sparse.spdiags(D, 0, *A.shape) self.embeddigns[ranks.graph] = pg.scipy_sparse_to_backend(S.T) self.known_ranks[ranks.graph] = [ ] # we know that the first term is zero and avoid direct embedding comparison for _ in range(len(self.weights)): S = S @ A self.known_ranks[ranks.graph].append( pg.scipy_sparse_to_backend(S)) ret = 0 on = pg.conv(ranks.np, self.embeddigns[ranks.graph]) for weight, S in zip(self.weights, self.known_ranks[ranks.graph]): uv = pg.conv(on, S) ret = ret + weight * uv return pg.to_signal(ranks, ret)
def test_norm_maintain(): # TODO: investigate that 2.5*epsilon is truly something to be expected graph = next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): prior = pg.to_signal(graph, {"A": 2}) posterior = pg.MabsMaintain(pg.Normalize(pg.PageRank(), "range")).rank(prior) assert abs(pg.sum(pg.abs(posterior.np)) - 2) < 2.5 * pg.epsilon()
def test_autotune(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training)) assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
def test_autotune_manual(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training) auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training)) assert auc1 <= auc2
def test_autotune_methods(): import numpy as np _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group})) aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()] auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training)) assert max(aucs)-np.std(aucs) <= auc2
def test_chebyshev(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) # do not test with tensorflow, as it can be too slow training, evaluation = pg.split(pg.to_signal(graph, {v: 1 for v in group})) tuned_auc = pg.AUC(evaluation, training).evaluate(pg.ParameterTuner().rank( graph, training)) tuned_chebyshev_auc = pg.AUC(evaluation, training).evaluate( pg.ParameterTuner(coefficient_type="chebyshev").rank(graph, training)) assert (tuned_auc - tuned_chebyshev_auc) < 0.1
def test_subgraph(): graph = next(pg.load_datasets_graph(["graph9"])) signal1 = pg.to_signal(graph, {"A": 1, "B": 1, "C": 1, "D": 1, "E": 0.5}) assert "L" in signal1 signal2 = pg.Subgraph().rank(signal1) assert signal2["E"] == 0.5 assert "L" not in signal2 signal3 = signal2 >> pg.Supergraph() assert "L" in signal3
def test_hoptuner_arnoldi_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="pytorch").rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="tensorflow").rank(training)) assert auc1 == auc2 assert auc1 == auc3
def test_preprocessor_types(): def test_graph(): return next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): from random import random graph = test_graph() signal = pg.to_signal(graph, {v: random() for v in graph}) laplacian = pg.preprocessor(normalization="laplacian")(graph) symmetric = pg.preprocessor(normalization="symmetric")(graph) assert pg.abs(pg.sum(pg.conv(signal, laplacian) + pg.conv(signal, symmetric) - signal)) <= pg.epsilon()
def test_seed_top(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) original_training = set(training) from random import random, seed seed(0) training, evaluation = pg.to_signal(graph, {v: 1 for v in graph if v in original_training or random() < 0.5}), \ pg.to_signal(graph, {v: 1 for v in evaluation}) for measure in [pg.AUC, pg.NDCG]: #ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) #base_result = measure(evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(pg.Sweep(pg.PageRank(0.9, max_iters=1000)), 0.9).rank(graph, training) undersampled_result1 = measure( evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(2, pg.Sweep(pg.PageRank(0.9, max_iters=1000))).rank( graph, training) undersampled_result2 = measure( evaluation, list(original_training)).evaluate(ranks)
def test_automatic_graph_casting(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): signal = pg.to_signal(graph, {"A": 1}) test_result1 = pg.PageRank(normalization='col').rank(signal, signal) test_result2 = pg.PageRank(normalization='col').rank(personalization=signal) assert pg.Mabs(test_result1)(test_result2) < pg.epsilon() with pytest.raises(Exception): pg.PageRank(normalization='col').rank(personalization={"A": 1}) with pytest.raises(Exception): pg.PageRank(normalization='col').rank(graph.copy(), signal)
def test_signal_np_auto_conversion(): import tensorflow as tf import numpy as np graph = nx.DiGraph([(1, 2), (2, 3)]) signal = pg.to_signal(graph, tf.convert_to_tensor([1., 2., 3.])) assert isinstance(signal.np, np.ndarray) with pg.Backend("tensorflow"): assert pg.backend_name() == "tensorflow" assert not isinstance(signal.np, np.ndarray) assert pg.backend_name() == "numpy" assert isinstance(signal.np, np.ndarray)
def test_autotune_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) for tuner in [pg.HopTuner, pg.AlgorithmSelection, pg.ParameterTuner]: auc3 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="pytorch").rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="tensorflow").rank(training)) auc1 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence).rank(training)) # TODO: maybe fix KLDivergence implementation to not be affected by backend.epsilon() assert abs(auc1-auc2) < 0.005 # different results due to different backend.epsilon() assert abs(auc1-auc3) < 0.005
def test_hoptuner_arnoldi(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) assert abs(auc1 - auc2) < 0.005
def test_stream(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): ranks1 = pg.Normalize( pg.PageRank(0.85, tol=pg.epsilon(), max_iters=1000, use_quotient=False)).rank(graph, {"A": 1}) ranks2 = pg.to_signal(graph, {"A": 1}) >> pg.PageRank( 0.85, tol=pg.epsilon(), max_iters=1000) + pg.Tautology() >> pg.Normalize() assert pg.Mabs(ranks1)(ranks2) < pg.epsilon()