def _transform(self, ranks: pg.GraphSignal, **kwargs): if ranks.graph not in self.known_ranks or not self.assume_immutability: with pg.Backend("numpy"): A = pg.preprocessor(normalization=self.normalization)( ranks.graph) D = pg.degrees( pg.preprocessor(normalization="none")(ranks.graph)) s = pg.sum( D)**0.5 / 2 if self.sparsity is None else self.sparsity D = (D / pg.max(D))**self.beta S = scipy.sparse.random( self.dims, A.shape[0], density=1. / s, data_rvs=lambda l: np.random.choice([-1, 1], size=l), format="csc") S = S @ scipy.sparse.spdiags(D, 0, *A.shape) self.embeddigns[ranks.graph] = pg.scipy_sparse_to_backend(S.T) self.known_ranks[ranks.graph] = [ ] # we know that the first term is zero and avoid direct embedding comparison for _ in range(len(self.weights)): S = S @ A self.known_ranks[ranks.graph].append( pg.scipy_sparse_to_backend(S)) ret = 0 on = pg.conv(ranks.np, self.embeddigns[ranks.graph]) for weight, S in zip(self.weights, self.known_ranks[ranks.graph]): uv = pg.conv(on, S) ret = ret + weight * uv return pg.to_signal(ranks, ret)
def test_preprocessor(): def test_graph(): return next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): graph = test_graph() with pytest.raises(Exception): pre = pg.preprocessor(normalization="unknown", assume_immutability=True) pre(graph) pre = pg.preprocessor(normalization="col", assume_immutability=False) graph = test_graph() res1 = pre(graph) res2 = pre(graph) assert id(res1) != id(res2) pre = pg.MethodHasher(pg.preprocessor, assume_immutability=True) graph = test_graph() res1 = pre(graph) pre.assume_immutability = False # have the ability to switch immutability off midway res2 = pre(graph) assert id(res1) != id(res2) pre = pg.preprocessor(normalization="col", assume_immutability=True) graph = test_graph() res1 = pre(graph) pre.clear_hashed() res2 = pre(graph) assert id(res1) != id(res2)
def test_preprocessor_types(): def test_graph(): return next(pg.load_datasets_graph(["graph5"])) for _ in supported_backends(): from random import random graph = test_graph() signal = pg.to_signal(graph, {v: random() for v in graph}) laplacian = pg.preprocessor(normalization="laplacian")(graph) symmetric = pg.preprocessor(normalization="symmetric")(graph) assert pg.abs(pg.sum(pg.conv(signal, laplacian) + pg.conv(signal, symmetric) - signal)) <= pg.epsilon()
def test_autorefs(): """ Tests that different (base) algorithms yield different citations, that all citations have at least one reference to a publication and that wrapping the same base algorithms yields the same citations. """ pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algs = { "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000), "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), "hk5'": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000), } algs = algs | pg.create_variations( algs, { "+Sweep": pg.Sweep, "+SO": pg.SeedOversampling, "+BSO": pg.BoostedSeedOversampling }) citations = set() for alg in algs.values(): citation = alg.cite() assert "\\cite{" in citation citations.add(citation) assert len(citations) == len(algs) - 4
def test_all_communities_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)} loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_appnp_tf(): from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) model = APPNP(features.shape[1], labels.shape[1]) with pg.Backend('tensorflow'): # pygrank computations in tensorflow backend graph = pg.preprocessor(renormalize=True, cors=True)(graph) # cors = use in many backends pg.gnn_train(model, features, graph, labels, training, validation, optimizer=tf.optimizers.Adam(learning_rate=0.01), verbose=True, epochs=50) assert float(pg.gnn_accuracy(labels, model(features, graph), test)) == 1. # dataset is super-easy to predict
def test_one_community_benchmarks(): pg.load_backend("numpy") datasets = ["graph9", "bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9), "tuned": pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9), } # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling}) # loader = pg.load_datasets_one_community(datasets) # pg.benchmark(algorithms, loader, "time", verbose=True) loader = pg.load_datasets_one_community(datasets) pg.benchmark_print( pg.benchmark_average( pg.benchmark_ranks( pg.benchmark(algorithms, loader, pg.AUC, fraction_of_training=.8))))
def test_multigroup_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = { "selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8) } loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark( algorithms | tuned, loader, lambda ground_truth, exclude: pg.MultiSupervised( pg.AUC, ground_truth, exclude), fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_filter_stream(): graph = next(pg.load_datasets_graph(["graph9"])) for _ in supported_backends(): test_result = pg.Normalize( pg.PageRank(normalization='symmetric', tol=max(1.E-9, pg.epsilon()), use_quotient=True)).rank(graph) norm_result = pg.PageRank(tol=max(1.E-9, pg.epsilon())) \ + pg.preprocessor(normalization='symmetric') \ + pg.Normalize("sum") \ >> pg.Normalize() \ | pg.to_signal(graph, {v: 1 for v in graph}) assert pg.Mabs(test_result)(norm_result) < pg.epsilon()
def test_optimization_dict(): pg.load_backend("numpy") from timeit import default_timer as time graph = next(pg.load_datasets_graph(["bigraph"])) personalization = {str(i): 1 for i in range(200)} preprocessor = pg.preprocessor(assume_immutability=True) preprocessor(graph) tic = time() for _ in range(10): pg.ParameterTuner(preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) unoptimized = time()-tic optimization = dict() tic = time() for _ in range(10): pg.ParameterTuner(optimization_dict=optimization, preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) optimized = time() - tic assert len(optimization) == 20 assert unoptimized > optimized
def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs, )), Dense(hidden, activation="relu", kernel_regularizer=L2(0.005)), Dropout(0.5), Dense(num_outputs) ]) pre = pg.preprocessor(renormalize=True, assume_immutability=True) self.ranker = pg.ParameterTuner(lambda par: pg.GenericGraphFilter( [par[0]**i for i in range(int(10))], preprocessor=pre, error_type="iters", max_iters=10), max_vals=[1], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.01, tuning_backend="numpy")
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def forward(self, features, graph, training=False): predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5*torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) model = AutotuneAPPNP(features.shape[1], labels.shape[1]) graph = pg.preprocessor(renormalize=True, cors=True)(graph) model.apply(init_weights) with pg.Backend('pytorch'): pg.gnn_train(model, features, graph, labels, training, validation, epochs=50)
def test_krylov_space(): graph = next(pg.load_datasets_graph(["bigraph"])) nodes = list(graph) for _ in supported_backends(): personalization = pg.to_signal(graph, {nodes[0]: 1, nodes[1]: 1}) M = pg.preprocessor(normalization="symmetric")(graph) krylov_dims = 5 krylov_result = pg.eye(int(krylov_dims)) krylov_base, H = pg.krylov_base(M, personalization.np, int(krylov_dims)) error_bound = pg.krylov_error_bound(krylov_base, H, M, personalization.np) assert pg.sum(pg.krylov2original(0, H, krylov_dims)) == 0 assert error_bound < 0.01 for _ in range(100): krylov_result = krylov_result @ H personalization.np = pg.conv(personalization.np, M) # print(pg.Mabs(personalization.np)(pg.krylov2original(krylov_base, krylov_result, int(krylov_dims)))) assert pg.Mabs(personalization.np)(pg.krylov2original( krylov_base, krylov_result, int(krylov_dims))) <= error_bound assert pg.krylov2original( krylov_base, krylov_result, int(krylov_dims)).shape == personalization.np.shape
import pygrank as pg #datasets = ["acm", "amazon", "ant", "citeseer","dblp","facebook0","facebook686","log4j","maven","pubmed","squirel", "twitter"] datasets = [ "facebook0", "facebook686", "log4j", "ant", "eucore", "citeseer", "dblp" ] seed_fractions = [0.3, 0.5] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") filters = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-6), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-6), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-6), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=1.E-6), } filters = pg.create_variations(filters, {"": pg.Tautology, "+Sweep": pg.Sweep}) for name, filter in filters.items(): print("=====", name, "=====") algorithms = { "None": filter, "Mult": pg.AdHocFairness(filter, "B"), "LFPRO": pg.AdHocFairness(filter, "O"), #"FBuck-C": pg.FairPersonalizer(filter, .8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, parameter_buckets=0),