def test_appnp_tf(): from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) model = APPNP(features.shape[1], labels.shape[1]) with pg.Backend('tensorflow'): # pygrank computations in tensorflow backend graph = pg.preprocessor(renormalize=True, cors=True)(graph) # cors = use in many backends pg.gnn_train(model, features, graph, labels, training, validation, optimizer=tf.optimizers.Adam(learning_rate=0.01), verbose=True, epochs=50) assert float(pg.gnn_accuracy(labels, model(features, graph), test)) == 1. # dataset is super-easy to predict
def test_gnn_errors(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) model = APPNP(features.shape[1], labels.shape[1]) with pytest.raises(Exception): pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=2) pg.load_backend('tensorflow') pg.gnn_train(model, features, graph, labels, training, validation, test=test, epochs=300, patience=2) predictions = model(features, graph) pg.load_backend('numpy') with pytest.raises(Exception): pg.gnn_accuracy(labels, predictions, test)
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) def forward(self, inputs, training=False): graph, features = inputs predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5 * torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) pg.load_backend('pytorch') model = AutotuneAPPNP(features.shape[1], labels.shape[1]) model.apply(init_weights) pg.gnn_train(model, graph, features, labels, training, validation, epochs=50, patience=2) # TODO: higher numbers fail only on github actions - for local tests it is fine assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.2 pg.load_backend('numpy')
def test_sweep_streaming(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( (pg.PageRank() >> pg.Sweep()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) auc3 = pg.AUC( {v: 1 for v in evaluation}, exclude=training).evaluate( pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep() | pg.to_signal(graph, {v: 1 for v in training})) assert auc1 > auc2 assert abs(auc1 - auc3) < pg.epsilon() with pytest.raises(Exception): pg.Sweep() << "a"
def test_seed_oversampling(): _, graph, group = next(pg.load_datasets_one_community(["graph9"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) training, evaluation = pg.to_signal(graph, {v: 1 for v in training}), pg.to_signal( graph, {v: 1 for v in evaluation}) for measure in [pg.NDCG, pg.AUC]: ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) base_result = measure(evaluation, training).evaluate(ranks) ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training) so_result = measure(evaluation, training).evaluate(ranks) bso_result = measure(evaluation, training).evaluate( pg.BoostedSeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training)) assert float(base_result) <= float(so_result) assert float(so_result) <= float(bso_result) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "top").rank(graph, training) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "neighbors").rank(graph, training) pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000), 'naive', oversample_from_iteration='original').rank( graph, {"A": 1})
def test_algorithm_selection(): for _ in supported_backends(): _, graph, communities = next( pg.load_datasets_multiple_communities(["bigraph"], max_group_number=3)) train, test = pg.split(communities, 0.05) # 5% of community members are known algorithms = pg.create_variations(pg.create_demo_filters(), pg.Normalize) supervised_algorithm = pg.AlgorithmSelection(algorithms.values(), measure=pg.AUC, tuning_backend="numpy") print(supervised_algorithm.cite()) modularity_algorithm = pg.AlgorithmSelection( algorithms.values(), fraction_of_training=1, measure=pg.Modularity().as_supervised_method(), tuning_backend="numpy") supervised_aucs = list() modularity_aucs = list() for seeds, members in zip(train.values(), test.values()): measure = pg.AUC(members, exclude=seeds) supervised_aucs.append(measure(supervised_algorithm(graph, seeds))) modularity_aucs.append(measure(modularity_algorithm(graph, seeds))) assert abs( sum(supervised_aucs) / len(supervised_aucs) - sum(modularity_aucs) / len(modularity_aucs)) < 0.05
def test_lowpass_tuning(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1) auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training)) assert auc2 > auc1*0.8
def test_hoptuner_autorgression(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.01) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC, autoregression=5).rank(training)) assert auc3 > auc1*0.9
def test_hoptuner_explicit_algorithm(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(lambda params: pg.GenericGraphFilter(params, krylov_dims=10), basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", krylov_dims=10, measure=pg.AUC).rank(training)) assert abs(auc1-auc2) < 0.005
def test_autotune_methods(): import numpy as np _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group})) aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()] auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training)) assert max(aucs)-np.std(aucs) <= auc2
def test_autotune_manual(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training) auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training)) assert auc1 <= auc2
def test_autotune(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training)) assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
def test_hoptuner_arnoldi_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="pytorch").rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="tensorflow").rank(training)) assert auc1 == auc2 assert auc1 == auc3
def test_chebyshev(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) # do not test with tensorflow, as it can be too slow training, evaluation = pg.split(pg.to_signal(graph, {v: 1 for v in group})) tuned_auc = pg.AUC(evaluation, training).evaluate(pg.ParameterTuner().rank( graph, training)) tuned_chebyshev_auc = pg.AUC(evaluation, training).evaluate( pg.ParameterTuner(coefficient_type="chebyshev").rank(graph, training)) assert (tuned_auc - tuned_chebyshev_auc) < 0.1
def test_appnp_tf(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ tf.keras.layers.Dropout(0.5, input_shape=(num_inputs, )), tf.keras.layers.Dense( hidden, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.L2(1.E-5)), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu), ]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) self.input_spec = None # prevents some versions of tensorflow from checking call inputs def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('tensorflow') model = APPNP(features.shape[1], labels.shape[1]) pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=50) assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.5 pg.load_backend('numpy')
def test_autotune_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) for tuner in [pg.HopTuner, pg.AlgorithmSelection, pg.ParameterTuner]: auc3 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="pytorch").rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="tensorflow").rank(training)) auc1 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence).rank(training)) # TODO: maybe fix KLDivergence implementation to not be affected by backend.epsilon() assert abs(auc1-auc2) < 0.005 # different results due to different backend.epsilon() assert abs(auc1-auc3) < 0.005
def test_hoptuner_arnoldi(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) assert abs(auc1 - auc2) < 0.005
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def forward(self, features, graph, training=False): predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5*torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) model = AutotuneAPPNP(features.shape[1], labels.shape[1]) graph = pg.preprocessor(renormalize=True, cors=True)(graph) model.apply(init_weights) with pg.Backend('pytorch'): pg.gnn_train(model, features, graph, labels, training, validation, epochs=50)
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(pg.PageRank())).rank( graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold("gap").transform(pg.PageRank().rank( graph, {v: 1 for v in training}))) # try all api types assert cond1 <= cond2
def test_strange_input_types(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) training, test = pg.split(group) for _ in supported_backends(): scores = pg.PageRank()(graph, {v: 1 for v in training}) ndcg = pg.NDCG(pg.to_signal(scores, {v: 1 for v in test}), k=3)({v: scores[v] for v in scores}) ndcg_biased = pg.NDCG(pg.to_signal(scores, {v: 1 for v in test}), k=3)({v: scores[v] for v in test}) assert ndcg < ndcg_biased
def test_split(): data = { "community1": ["A", "B", "C", "D"], "community2": ["B", "E", "F", "G", "H", "I"] } training, test = pg.split(data, 1) assert training == test training, test = pg.split(data, 0.5) assert len(training["community2"]) == 3 assert len(training["community1"]) == 2 assert len(test["community2"]) == 3 assert len(set(training["community1"]) - set(test["community1"])) == len( training["community1"]) assert len(set(training["community2"]) - set(test["community2"])) == len( training["community2"]) training, test = pg.split(data, 2) assert len(training["community2"]) == 2 assert len(test["community1"]) == 2 training, test = pg.split(data["community1"], 0.75) assert len(training) == 3 assert len(test) == 1 training, test = pg.split(set(data["community1"]), 0.75) assert len(training) == 3 assert len(test) == 1
def test_auc_ndcg_compliance(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) training, test = pg.split(group, 0.5) for _ in supported_backends(): scores1 = pg.PageRank()(graph, training) scores2 = pg.HeatKernel()(graph, training) AUC1 = pg.AUC(test, exclude=training)(scores1) AUC2 = pg.AUC(test, exclude=training)(scores2) NDCG1 = float(pg.NDCG(test, exclude=training)(scores1)) NDCG2 = float(pg.NDCG(test, exclude=training)(scores2)) assert (AUC1 < AUC2) == (NDCG1 < NDCG2) with pytest.raises(Exception): pg.AUC(test, exclude=test, k=len(graph) + 1)(scores2) with pytest.raises(Exception): pg.NDCG(test, exclude=training, k=len(graph) + 1)(scores2)
def test_sweep(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.1) auc1 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate( pg.Sweep(pg.PageRank()).rank( graph, {v: 1 for v in training})) auc2 = pg.AUC({v: 1 for v in evaluation}, exclude=training).evaluate(pg.PageRank().rank( graph, {v: 1 for v in training})) assert auc1 > auc2
def test_seed_top(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) original_training = set(training) from random import random, seed seed(0) training, evaluation = pg.to_signal(graph, {v: 1 for v in graph if v in original_training or random() < 0.5}), \ pg.to_signal(graph, {v: 1 for v in evaluation}) for measure in [pg.AUC, pg.NDCG]: #ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) #base_result = measure(evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(pg.Sweep(pg.PageRank(0.9, max_iters=1000)), 0.9).rank(graph, training) undersampled_result1 = measure( evaluation, list(original_training)).evaluate(ranks) ranks = pg.Top(2, pg.Sweep(pg.PageRank(0.9, max_iters=1000))).rank( graph, training) undersampled_result2 = measure( evaluation, list(original_training)).evaluate(ranks)
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) algorithm = pg.PageRank() cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(algorithm), "gap").rank(graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold(0.3).transform( algorithm.rank(graph, {v: 1 for v in training}))) # try all api types cond3 = pg.Conductance().evaluate( pg.Threshold(1).transform( algorithm.rank( graph, {v: 1 for v in training}))) # should yield infinite conductance # TODO: find an algorithm other than gap to outperform 0.2 threshold too assert cond1 <= cond2 assert cond2 <= cond3
def test_fair_personalizer_mistreatment(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "Base": lambda G, p, s: H.rank(G, p), "FairPersMistreat": pg.Normalize(pg.FairPersonalizer(H, parity_type="mistreatment", pRule_weight=10)), "FairPersTPR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TPR", pRule_weight=10)), "FairPersTNR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TNR", pRule_weight=-1)) # TNR optimization increases mistreatment for this example } mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TPR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TNR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))])]) _, graph, groups = next(pg.load_datasets_multiple_communities(["synthfeats"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) train, test = pg.split(labels) # TODO: maybe try to check for greater improvement base_mistreatment = mistreatment(test, sensitive, train)(algorithms["Base"](graph, train, sensitive)) for algorithm in algorithms.values(): if algorithm != algorithms["Base"]: print(algorithm.cite()) assert base_mistreatment >= mistreatment(test, sensitive, train)(algorithm(graph, train, sensitive))
nodes = list(graph) # graph = nx.Graph() for node in nodes: graph.add_node(node) wordnames = {i: "w"+str(i) for i in range(features.shape[1])} for i, node in enumerate(nodes): for j in range(features.shape[1]): if features[i, j] != 0: graph.add_edge(node, wordnames[j], weight=features[i, j]) group_lists = list(groups.values()) groups = [pg.to_signal(graph, group) for group in groups.values()] accs = list() for seed in range(100): ranker = pg.PageRank(0.85, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations #ranker = pg.LowPassRecursiveGraphFilter([1 - .9 / (pg.log(i + 1) + 1) for i in range(10)], renormalize=True, assume_immutability=True, tol=None) training, test = pg.split(nodes, 0.8, seed=seed) training = set(training) ranks_set = [ranker(graph, {node: 1 for node in group if node in training}) for group in group_lists] options = list(range(len(ranks_set))) found_set = [list() for _ in training] tp = 0 for v in test: if max(options, key=lambda i: ranks_set[i][v]) == max(options, key=lambda i: groups[i][v]): tp += 1 accs.append(tp/len(test)) print(sum(accs)/len(accs))
{v: 1 for v in known_members }) >> pg.Sweep(graph_filter) >> pg.Normalize("range") if top is not None: ranks = ranks * (1 - pg.to_signal(graph, {v: 1 for v in known_members}) ) # set known member scores to zero return sorted(list(graph), key=lambda node: -ranks[node] )[:top] # return specific number of top predictions threshold = pg.optimize(max_vals=[1], loss=lambda p: pg.Conductance(graph) (pg.Threshold(p[0]).transform(ranks)))[0] known_members = set(known_members) return [ v for v in graph if ranks[v] > threshold and v not in known_members ] _, graph, group = next(pg.load_datasets_one_community(["citeseer"])) print(len(group)) train, test = pg.split(group, 0.1) found = overlapping_community_detection(graph, train) # node-based evaluation (we work on the returned list of nodes instead of graph signals) test = set(test) TP = len([v for v in found if v in test]) print("Precision", TP / len(found)) print("Recall", TP / len(test)) print("Match size", len(found) / len(test))
import pygrank as pg def community_detection(graph, known_members_set): ranks_set = [ pg.ParameterTuner()(graph, known_members) for known_members in known_members_set ] options = list(range(len(ranks_set))) found_set = [list() for _ in known_members_set] for v in graph: found_set[max(options, key=lambda i: ranks_set[i][v])].append(v) return found_set _, graph, groups = next(pg.load_datasets_multiple_communities(["citeseer"])) train_set, test_set = pg.split(groups, 0.5) train_set = train_set.values() test_set = test_set.values() found_set = community_detection(graph, train_set) precisions = list() recalls = list() for found, train, test in zip(found_set, train_set, test_set): train, test = set(train), set(test) new_nodes = [v for v in found if v not in train] TP = len([v for v in new_nodes if v in test]) precisions.append(TP / len(new_nodes) if new_nodes else 0) recalls.append(TP / len(test)) print("Avg. precision", sum(precisions) / len(precisions)) print("Avg. recall", sum(recalls) / len(recalls))
def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) if not training or self.propagate_on_training: predict = self.ranker.propagate( graph, predict, graph_dropout=self.graph_dropout if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('numpy') graph, features, labels = pg.load_feature_dataset('cora') for seed in range(10): training, test = pg.split(list(range(len(graph))), 0.8, seed=seed) training, validation = pg.split(training, 1 - 0.2 / 0.8, seed=seed) architectures = { "APPNP": APPNP(features.shape[1], labels.shape[1], alpha=0.9), #"LAPPNP": APPNP(features.shape[1], labels.shape[1], alpha=tf.Variable([0.85])), "APFNP": APPNP(features.shape[1], labels.shape[1], alpha="estimated") } pg.load_backend('tensorflow') accs = dict() for architecture, model in architectures.items(): pg.gnn_train(model, graph, features, labels, training,