Beispiel #1
0
def test_one_community_benchmarks():
    pg.load_backend("numpy")
    datasets = ["graph9", "bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    algorithms = {
        "ppr0.85":
        pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "ppr0.99":
        pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "hk3":
        pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "hk5":
        pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "tuned":
        pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9),
    }
    # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling})
    # loader = pg.load_datasets_one_community(datasets)
    # pg.benchmark(algorithms, loader, "time", verbose=True)

    loader = pg.load_datasets_one_community(datasets)
    pg.benchmark_print(
        pg.benchmark_average(
            pg.benchmark_ranks(
                pg.benchmark(algorithms,
                             loader,
                             pg.AUC,
                             fraction_of_training=.8))))
Beispiel #2
0
def test_gnn_errors():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    from tensorflow.keras.layers import Dropout, Dense
    from tensorflow.keras.regularizers import L2

    class APPNP(tf.keras.Sequential):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__([
                Dropout(0.5, input_shape=(num_inputs,)),
                Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)),
                Dropout(0.5),
                Dense(num_outputs, activation="relu")])
            self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True,
                                      use_quotient=False, error_type="iters", max_iters=10)  # 10 iterations

        def call(self, features, graph, training=False):
            predict = super().call(features, training=training)
            propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0)
            return tf.nn.softmax(propagate, axis=1)

    model = APPNP(features.shape[1], labels.shape[1])
    with pytest.raises(Exception):
        pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=2)
    pg.load_backend('tensorflow')
    pg.gnn_train(model, features, graph, labels, training, validation, test=test, epochs=300, patience=2)
    predictions = model(features, graph)
    pg.load_backend('numpy')
    with pytest.raises(Exception):
        pg.gnn_accuracy(labels, predictions, test)
Beispiel #3
0
def test_appnp_torch():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    class AutotuneAPPNP(torch.nn.Module):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__()
            self.layer1 = torch.nn.Linear(num_inputs, hidden)
            self.layer2 = torch.nn.Linear(hidden, num_outputs)
            self.activation = torch.nn.ReLU()
            self.dropout = torch.nn.Dropout(0.5)
            self.num_outputs = num_outputs
            self.ranker = pg.PageRank(0.9,
                                      renormalize=True,
                                      assume_immutability=True,
                                      error_type="iters",
                                      max_iters=10)

        def forward(self, inputs, training=False):
            graph, features = inputs
            predict = self.dropout(torch.FloatTensor(features))
            predict = self.dropout(self.activation(self.layer1(predict)))
            predict = self.activation(self.layer2(predict))
            predict = self.ranker.propagate(
                graph, predict, graph_dropout=0.5 if training else 0)
            ret = torch.nn.functional.softmax(predict, dim=1)
            self.loss = 0
            for param in self.layer1.parameters():
                self.loss = self.loss + 1E-5 * torch.norm(param)
            return ret

    def init_weights(m):
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

    pg.load_backend('pytorch')
    model = AutotuneAPPNP(features.shape[1], labels.shape[1])
    model.apply(init_weights)
    pg.gnn_train(model,
                 graph,
                 features,
                 labels,
                 training,
                 validation,
                 epochs=50,
                 patience=2)
    # TODO: higher numbers fail only on github actions - for local tests it is fine
    assert float(pg.gnn_accuracy(labels, model([graph, features]),
                                 test)) >= 0.2
    pg.load_backend('numpy')
Beispiel #4
0
def test_backend_load():
    pg.load_backend("tensorflow")
    assert pg.backend_name() == "tensorflow"
    pg.load_backend("matvec")
    assert pg.backend_name() == "matvec"
    pg.load_backend("numpy")
    assert pg.backend_name() == "numpy"
    with pytest.raises(Exception):
        pg.load_backend("unknown")
    assert pg.backend_name() == "numpy"
Beispiel #5
0
def test_optimization_dict():
    pg.load_backend("numpy")
    from timeit import default_timer as time
    graph = next(pg.load_datasets_graph(["bigraph"]))
    personalization = {str(i): 1 for i in range(200)}
    preprocessor = pg.preprocessor(assume_immutability=True)
    preprocessor(graph)
    tic = time()
    for _ in range(10):
        pg.ParameterTuner(preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization)
    unoptimized = time()-tic
    optimization = dict()
    tic = time()
    for _ in range(10):
        pg.ParameterTuner(optimization_dict=optimization, preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization)
    optimized = time() - tic
    assert len(optimization) == 20
    assert unoptimized > optimized
Beispiel #6
0
def test_appnp_tf():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    class APPNP(tf.keras.Sequential):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__([
                tf.keras.layers.Dropout(0.5, input_shape=(num_inputs, )),
                tf.keras.layers.Dense(
                    hidden,
                    activation=tf.nn.relu,
                    kernel_regularizer=tf.keras.regularizers.L2(1.E-5)),
                tf.keras.layers.Dropout(0.5),
                tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu),
            ])
            self.ranker = pg.PageRank(0.9,
                                      renormalize=True,
                                      assume_immutability=True,
                                      error_type="iters",
                                      max_iters=10)
            self.input_spec = None  # prevents some versions of tensorflow from checking call inputs

        def call(self, inputs, training=False):
            graph, features = inputs
            predict = super().call(features, training=training)
            predict = self.ranker.propagate(
                graph, predict, graph_dropout=0.5 if training else 0)
            return tf.nn.softmax(predict, axis=1)

    pg.load_backend('tensorflow')
    model = APPNP(features.shape[1], labels.shape[1])
    pg.gnn_train(model,
                 graph,
                 features,
                 labels,
                 training,
                 validation,
                 test=test,
                 epochs=50)
    assert float(pg.gnn_accuracy(labels, model([graph, features]),
                                 test)) >= 0.5
    pg.load_backend('numpy')
Beispiel #7
0
        self.num_outputs = num_outputs
        self.propagate_on_training = propagate_on_training

    def call(self, inputs, training=False):
        graph, features = inputs
        predict = super().call(features, training=training)
        if not training or self.propagate_on_training:
            predict = self.ranker.propagate(
                graph,
                predict,
                graph_dropout=self.graph_dropout if training else 0)

        return tf.nn.softmax(predict, axis=1)


pg.load_backend('numpy')
graph, features, labels = pg.load_feature_dataset('cora')
for seed in range(10):
    training, test = pg.split(list(range(len(graph))), 0.8, seed=seed)
    training, validation = pg.split(training, 1 - 0.2 / 0.8, seed=seed)
    architectures = {
        "APPNP": APPNP(features.shape[1], labels.shape[1], alpha=0.9),
        #"LAPPNP": APPNP(features.shape[1], labels.shape[1], alpha=tf.Variable([0.85])),
        "APFNP": APPNP(features.shape[1], labels.shape[1], alpha="estimated")
    }

    pg.load_backend('tensorflow')
    accs = dict()
    for architecture, model in architectures.items():
        pg.gnn_train(model,
                     graph,
Beispiel #8
0
def supported_backends():
    for backend in [
            "matvec", "pytorch", "tensorflow", "torch_sparse", "numpy"
    ]:
        pg.load_backend(backend)
        yield backend
Beispiel #9
0
def supported_backends():
    for backend in ["pytorch", "tensorflow", "numpy"]:
        pg.load_backend(backend)
        yield backend
Beispiel #10
0
                                        min_vals=[0.5, 5],
                                        measure=pg.Mabs,
                                        deviation_tol=0.1,
                                        tuning_backend="numpy")

    def call(self, inputs, training=False):
        graph, features = inputs
        predict = super().call(features, training=training)
        propagate = self.ranker.propagate(graph,
                                          predict,
                                          graph_dropout=0.5 if training else 0)
        return tf.nn.softmax(propagate, axis=1)


graph, features, labels = pg.load_feature_dataset('citeseer')
acc = 0
for _ in range(10):
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)
    pg.load_backend('tensorflow')  # explicitly load the appropriate backend
    model = APPNP(features.shape[1], labels.shape[1])
    pg.gnn_train(model,
                 graph,
                 features,
                 labels,
                 training,
                 validation,
                 optimizer=tf.optimizers.Adam(learning_rate=0.01))
    acc += pg.gnn_accuracy(labels, model([graph, features]), test) / 10
    print("Accuracy", pg.gnn_accuracy(labels, model([graph, features]), test))
print(acc)
import pygrank as pg

pg.load_backend("matvec")

#datasets = ["amazon", "citeseer", "maven"]
datasets = ["amazon"]
community_size = 500

pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
convergence = {"tol": 1.E-9, "max_iters": 10000}
#convergence = {"error_type": "iters", "max_iters": 20}
tuned = [1]+[10000, 0.7573524630180889, 0.0, 1.0, 0.004950495049504955, 0.4975492598764827, 0.2573767277717871, 0.0, 0.2549259876482698, 0.009851975296539583, 0.5, 0.5, 0.5, 0.2549259876482698, 0.5, 0.5, 0.5, 0.009851975296539583, 0.009851975296539583, 0.25002450740123516, 0.2524752475247525, 0.0, 0.5, 0.0, 0.009851975296539583, 0.0, 0.004950495049504955, 0.0, 0.5, 0.004950495049504955, 0.7475247524752475, 0.004950495049504955, 0.009851975296539583, 0.0, 0.995049504950495, 0.0, 0.5, 0.0, 0.004950495049504955, 0.0]

algorithms = {
    "ppr0.5": pg.PageRank(alpha=0.5, preprocessor=pre, **convergence),
    "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, **convergence),
    "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, **convergence),
    "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, **convergence),
    "hk2": pg.HeatKernel(t=2, preprocessor=pre, **convergence),
    "hk3": pg.HeatKernel(t=3, preprocessor=pre, **convergence),
    "hk5": pg.HeatKernel(t=5, preprocessor=pre, **convergence),
    "hk7": pg.HeatKernel(t=7, preprocessor=pre, **convergence),
    #"exp": pg.GenericGraphFilter(tuned, preprocessor=pre, error_type="iters", max_iters=41)
}

postprocessor = pg.Tautology
algorithms = pg.benchmarks.create_variations(algorithms, postprocessor)
measure = pg.Time
optimization = pg.SelfClearDict()