def test_gnn_errors(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) model = APPNP(features.shape[1], labels.shape[1]) with pytest.raises(Exception): pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=2) pg.load_backend('tensorflow') pg.gnn_train(model, features, graph, labels, training, validation, test=test, epochs=300, patience=2) predictions = model(features, graph) pg.load_backend('numpy') with pytest.raises(Exception): pg.gnn_accuracy(labels, predictions, test)
def test_appnp_tf(): from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) model = APPNP(features.shape[1], labels.shape[1]) with pg.Backend('tensorflow'): # pygrank computations in tensorflow backend graph = pg.preprocessor(renormalize=True, cors=True)(graph) # cors = use in many backends pg.gnn_train(model, features, graph, labels, training, validation, optimizer=tf.optimizers.Adam(learning_rate=0.01), verbose=True, epochs=50) assert float(pg.gnn_accuracy(labels, model(features, graph), test)) == 1. # dataset is super-easy to predict
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) def forward(self, inputs, training=False): graph, features = inputs predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5 * torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) pg.load_backend('pytorch') model = AutotuneAPPNP(features.shape[1], labels.shape[1]) model.apply(init_weights) pg.gnn_train(model, graph, features, labels, training, validation, epochs=50, patience=2) # TODO: higher numbers fail only on github actions - for local tests it is fine assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.2 pg.load_backend('numpy')
def test_appnp_tf(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ tf.keras.layers.Dropout(0.5, input_shape=(num_inputs, )), tf.keras.layers.Dense( hidden, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.L2(1.E-5)), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu), ]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) self.input_spec = None # prevents some versions of tensorflow from checking call inputs def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('tensorflow') model = APPNP(features.shape[1], labels.shape[1]) pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=50) assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.5 pg.load_backend('numpy')
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.ParameterTuner( lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))], error_type="iters", max_iters=int(10)), max_vals=[0.95], min_vals=[0.5], verbose=False, measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def forward(self, features, graph, training=False): predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5*torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) model = AutotuneAPPNP(features.shape[1], labels.shape[1]) graph = pg.preprocessor(renormalize=True, cors=True)(graph) model.apply(init_weights) with pg.Backend('pytorch'): pg.gnn_train(model, features, graph, labels, training, validation, epochs=50)
import pygrank as pg import networkx as nx graph, features, groups = pg.load_feature_dataset('citeseer', groups_no_labels=True, graph_api=nx) nodes = list(graph) # graph = nx.Graph() for node in nodes: graph.add_node(node) wordnames = {i: "w"+str(i) for i in range(features.shape[1])} for i, node in enumerate(nodes): for j in range(features.shape[1]): if features[i, j] != 0: graph.add_edge(node, wordnames[j], weight=features[i, j]) group_lists = list(groups.values()) groups = [pg.to_signal(graph, group) for group in groups.values()] accs = list() for seed in range(100): ranker = pg.PageRank(0.85, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations #ranker = pg.LowPassRecursiveGraphFilter([1 - .9 / (pg.log(i + 1) + 1) for i in range(10)], renormalize=True, assume_immutability=True, tol=None) training, test = pg.split(nodes, 0.8, seed=seed) training = set(training) ranks_set = [ranker(graph, {node: 1 for node in group if node in training}) for group in group_lists] options = list(range(len(ranks_set))) found_set = [list() for _ in training] tp = 0 for v in test: if max(options, key=lambda i: ranks_set[i][v]) == max(options, key=lambda i: groups[i][v]):
self.propagate_on_training = propagate_on_training def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) if not training or self.propagate_on_training: predict = self.ranker.propagate( graph, predict, graph_dropout=self.graph_dropout if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('numpy') graph, features, labels = pg.load_feature_dataset('cora') for seed in range(10): training, test = pg.split(list(range(len(graph))), 0.8, seed=seed) training, validation = pg.split(training, 1 - 0.2 / 0.8, seed=seed) architectures = { "APPNP": APPNP(features.shape[1], labels.shape[1], alpha=0.9), #"LAPPNP": APPNP(features.shape[1], labels.shape[1], alpha=tf.Variable([0.85])), "APFNP": APPNP(features.shape[1], labels.shape[1], alpha="estimated") } pg.load_backend('tensorflow') accs = dict() for architecture, model in architectures.items(): pg.gnn_train(model, graph, features,
assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations def call(self, features, graph, training=False): # can call with tensor graph predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 * training) return tf.nn.softmax(propagate, axis=1) graph, features, labels = pg.load_feature_dataset('citeseer') training, test = pg.split(list(range(len(graph))), 0.8, seed=5) # seeded split training, validation = pg.split(training, 1 - 0.2 / 0.8) model = APPNP(features.shape[1], labels.shape[1]) with pg.Backend('tensorflow'): # pygrank with tensorflow backend pg.gnn_train(model, features, graph, labels, training, validation, optimizer=tf.optimizers.Adam(learning_rate=0.01), verbose=True) print("Accuracy", pg.gnn_accuracy(labels, model(features, graph), test))
import pygrank as pg import tensorflow as tf graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1-0.2/0.8) class APPNP: def __init__(self, num_inputs, num_outputs, hidden=64): self.mlp = tf.keras.Sequential([ tf.keras.layers.Dropout(0.5, input_shape=(num_inputs,)), tf.keras.layers.Dense(hidden, activation=tf.nn.relu), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu), ]) self.num_outputs = num_outputs self.trainable_variables = self.mlp.trainable_variables #self.ranker = pg.GenericGraphFilter(([0.9]+[0]*9)*5 # , error_type="iters", max_iters=50 # , renormalize=True, assume_immutability=True) #self.ranker = pg.GenericGraphFilter([0.9]*10, renormalize=True, assume_immutability=True, tol=1.E-16) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) """pre = pg.preprocessor(renormalize=True, assume_immutability=True) self.ranker = pg.ParameterTuner( lambda params: pg.GenericGraphFilter([params[0]] * int(params[1]), preprocessor=pre, tol=1.E-16), max_vals=[0.99, 20], min_vals=[0.5, 5], measure=pg.KLDivergence, deviation_tol=0.1, tuning_backend="numpy")""" def __call__(self, graph, features, training=False): predict = self.mlp(features, training=training)