Example #1
0
def test_fullbatch_cluster_models(model_type):
    G = example_graph_random(n_nodes=50)
    generator = ClusterNodeGenerator(G, clusters=10)
    nodes = G.nodes()[:40]
    gen = generator.flow(nodes, targets=np.ones(len(nodes)))

    gnn = model_type(
        generator=generator,
        layer_sizes=[16, 16, 1],
        activations=["relu", "relu", "relu"],
    )

    model = tf.keras.Model(*gnn.in_out_tensors())
    model.compile(optimizer="adam", loss="binary_crossentropy")
    history = model.fit(gen, validation_data=gen, epochs=2)
    results = model.evaluate(gen)

    # this doesn't work for any cluster models including ClusterGCN
    # because the model spits out predictions with shapes:
    # [(1, cluster_1_size, feat_size), (1, cluster_2_size, feat_size)...]
    # and attempts to concatenate along axis 0
    # predictions = model.predict(gen)
    x_in, x_out = gnn.in_out_tensors()
    x_out_flat = tf.squeeze(x_out, 0)
    embedding_model = tf.keras.Model(inputs=x_in, outputs=x_out_flat)
    predictions = embedding_model.predict(gen)

    assert predictions.shape == (len(nodes), 1)
Example #2
0
def test_benchmark_ClusterGCN_generator(benchmark, q):
    G = example_graph_random(feature_size=10, n_nodes=1000, n_edges=5000)

    generator = ClusterNodeGenerator(G, clusters=10, q=q)
    seq = generator.flow(G.nodes())

    # iterate over all the batches
    benchmark(lambda: list(seq))
Example #3
0
def test_ClusterNodeSquence():

    G = create_stellargraph()

    generator = ClusterNodeGenerator(G, clusters=1, q=1).flow(
        node_ids=["a", "b", "c", "d"]
    )

    assert len(generator) == 1

    generator = ClusterNodeGenerator(G, clusters=4, q=1).flow(
        node_ids=["a", "b", "c", "d"]
    )
    assert len(generator) == 4

    generator = ClusterNodeGenerator(G, clusters=4, q=1).flow(
        node_ids=["a", "b", "c", "d"]
    )

    # ClusterNodeSequence returns the following:
    #      [features, target_node_indices, adj_cluster], cluster_targets
    for batch in generator:
        assert len(batch) == 2
        # The first dimension is the batch dimension necessary to make this work with Keras
        assert batch[0][0].shape == (1, 1, 2)
        assert batch[0][1].shape == (1, 1)
        # one node so that adjacency matrix is 1x1
        assert batch[0][2].shape == (1, 1, 1)
        # no targets given
        assert batch[1] is None

    # Use 2 clusters
    generator = ClusterNodeGenerator(G, clusters=2, q=1).flow(
        node_ids=["a", "b", "c", "d"]
    )
    assert len(generator) == 2

    # ClusterNodeSequence returns the following:
    #      [features, target_node_indices, adj_cluster], cluster_targets
    for batch in generator:
        assert len(batch) == 2
        # The first dimension is the batch dimension necessary to make this work with Keras
        assert batch[0][0].shape == (1, 2, 2)
        assert batch[0][1].shape == (1, 2)
        # two nodes so that adjacency matrix is 2x2
        assert batch[0][2].shape == (1, 2, 2)
        # no targets given
        assert batch[1] is None
Example #4
0
def test_ClusterGCN_activations():

    G = create_stellargraph()
    generator = ClusterNodeGenerator(G)

    # Test activations are set correctly
    cluster_gcn = ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu"])
    assert cluster_gcn.activations == ["relu"]

    cluster_gcn = ClusterGCN(
        layer_sizes=[2, 2], generator=generator, activations=["relu", "relu"]
    )
    assert cluster_gcn.activations == ["relu", "relu"]

    cluster_gcn = ClusterGCN(
        layer_sizes=[2], generator=generator, activations=["linear"]
    )
    assert cluster_gcn.activations == ["linear"]

    with pytest.raises(TypeError):
        # activations for layers must be specified
        ClusterGCN(layer_sizes=[2], generator=generator)

    with pytest.raises(AssertionError):
        # More activations than layers
        ClusterGCN(layer_sizes=[2], generator=generator, activations=["relu", "linear"])

    with pytest.raises(AssertionError):
        # Fewer activations than layers
        ClusterGCN(layer_sizes=[2, 2], generator=generator, activations=["relu"])

    with pytest.raises(ValueError):
        # Unknown activation
        ClusterGCN(layer_sizes=[2], generator=generator, activations=["bleach"])
Example #5
0
def test_ClusterGCN_apply():

    G = create_stellargraph()

    generator = ClusterNodeGenerator(G)

    cluster_gcn_model = ClusterGCN(
        layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.0
    )

    x_in, x_out = cluster_gcn_model.build()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow(["a", "b", "c"]))
    assert preds_2.shape == (1, 3, 2)
def test_ClusterGCN_save_load(tmpdir):
    G, _ = create_graph_features()
    generator = ClusterNodeGenerator(G)
    cluster_gcn = ClusterGCN(layer_sizes=[2, 3],
                             activations=["relu", "relu"],
                             generator=generator)
    test_utils.model_save_load(tmpdir, cluster_gcn)
Example #7
0
def _dispatch_generator(graph, model_name, params,
                        generator_type="node"):
    """Create a graph generator."""
    if model_name == "watchyourstep":
        return AdjacencyPowerGenerator(
            graph, num_powers=params["num_powers"])
    elif model_name in ["complex", "distmult"]:
        return KGTripleGenerator(graph, params["batch_size"])
    elif model_name == "attri2vec":
        if generator_type == "node":
            return Attri2VecNodeGenerator(
                graph, params["batch_size"])
        else:
            return Attri2VecLinkGenerator(
                graph, params["batch_size"])
    elif model_name in ["graphsage", "graphsage_dgi"]:
        if generator_type == "node":
            return GraphSAGENodeGenerator(
                graph, params["batch_size"], params["num_samples"])
        else:
            return GraphSAGELinkGenerator(
                graph, params["batch_size"], params["num_samples"])
    elif model_name in ["gcn_dgi", "gat_dgi"]:
        return FullBatchNodeGenerator(graph, sparse=False)
    elif model_name in ["cluster_gcn_dgi", "cluster_gat_dgi"]:
        return ClusterNodeGenerator(
            graph, clusters=params["clusters"],
            q=params["clusters_q"])
    else:
        raise ValueError(f"Unknown model name '{model_name}'")
def test_ClusterGCN_apply():

    G, _ = create_graph_features()

    generator = ClusterNodeGenerator(G)

    cluster_gcn_model = ClusterGCN(layer_sizes=[2],
                                   generator=generator,
                                   activations=["relu"],
                                   dropout=0.0)

    x_in, x_out = cluster_gcn_model.in_out_tensors()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    preds_2 = model.predict(generator.flow(["a", "b", "c"]))
    assert preds_2.shape == (1, 3, 2)
def test_cluster_weighted():

    G = create_stellargraph()

    unweighted = ClusterNodeGenerator(
        G, clusters=1, q=1, weighted=False).flow(node_ids=["a", "b", "c", "d"])
    weighted = ClusterNodeGenerator(
        G, clusters=1, q=1, weighted=True).flow(node_ids=["a", "b", "c", "d"])

    assert len(unweighted) == len(weighted) == 1
    unweighted_features, _ = unweighted[0]
    weighted_features, _ = weighted[0]

    def canonical(adj):
        return np.sort(adj.ravel())

    assert not np.allclose(canonical(weighted_features[2]),
                           canonical(unweighted_features[2]))
def test_ClusterGCN_init():
    G, features = create_graph_features()

    generator = ClusterNodeGenerator(G)
    cluster_gcn_model = ClusterGCN(layer_sizes=[2],
                                   generator=generator,
                                   activations=["relu"],
                                   dropout=0.5)

    assert cluster_gcn_model.layer_sizes == [2]
    assert cluster_gcn_model.activations == ["relu"]
    assert cluster_gcn_model.dropout == 0.5
Example #11
0
def _fit_deep_graph_infomax(train_graph, params, model_name):
    """Train unsupervised Deep Graph Infomax."""
    if "gcn_dgi" in model_name or "gat_dgi" in model_name:
        if "cluster" in model_name:
            generator = ClusterNodeGenerator(
                train_graph, clusters=params["clusters"],
                q=params["clusters_q"])
        else:
            generator = FullBatchNodeGenerator(train_graph, sparse=False)

        if "gcn_dgi" in model_name:
            embedding_layer = GCN(
                layer_sizes=[params["embedding_dimension"]],
                activations=["relu"], generator=generator)
        elif "gat_dgi" in model_name:
            embedding_layer = GAT(
                layer_sizes=[params["embedding_dimension"]],
                activations=["relu"], generator=generator, attn_heads=8)
    elif model_name == "graphsage_dgi":
        generator = GraphSAGENodeGenerator(
            train_graph, batch_size=50, num_samples=[5])
        embedding_layer = GraphSAGE(
            layer_sizes=[params["embedding_dimension"]], activations=["relu"],
            generator=generator
        )
    else:
        raise ValueError(f"Unknown mode name {model_name}")

    embedding_model = _execute_deep_graph_infomax(
        train_graph, embedding_layer, generator, params)

    # Here the models can be both inductive and transductive
    if model_name in ["gcn_dgi", "gat_dgi", "graphsage_dgi"]:
        return embedding_model.predict(
            generator.flow(train_graph.nodes()))
    else:
        return embedding_model
def test_kernel_and_bias_defaults():
    graph, _ = create_graph_features()
    generator = ClusterNodeGenerator(graph)
    cluster_gcn = ClusterGCN(layer_sizes=[2, 2],
                             activations=["relu", "relu"],
                             generator=generator)
    for layer in cluster_gcn._layers:
        if isinstance(layer, GraphConvolution):
            assert isinstance(layer.kernel_initializer,
                              tf.initializers.GlorotUniform)
            assert isinstance(layer.bias_initializer, tf.initializers.Zeros)
            assert layer.kernel_regularizer is None
            assert layer.bias_regularizer is None
            assert layer.kernel_constraint is None
            assert layer.bias_constraint is None
Example #13
0
def test_ClusterGCN_init():
    G, features = create_graph_features()
    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f for n, f in zip(nodes, features)}, orient="index"
    )
    G = StellarGraph(G, node_type_name="node", node_features=node_features)

    generator = ClusterNodeGenerator(G)
    cluster_gcn_model = ClusterGCN(
        layer_sizes=[2], generator=generator, activations=["relu"], dropout=0.5
    )

    assert cluster_gcn_model.layer_sizes == [2]
    assert cluster_gcn_model.activations == ["relu"]
    assert cluster_gcn_model.dropout == 0.5
Example #14
0
def test_ClusterGCN_regularisers():
    G = create_stellargraph()

    generator = ClusterNodeGenerator(G)

    cluster_gcn = ClusterGCN(
        layer_sizes=[2],
        activations=["relu"],
        generator=generator,
        kernel_regularizer=keras.regularizers.l2(),
    )

    with pytest.raises(ValueError):
        ClusterGCN(
            layer_sizes=[2],
            activations=["relu"],
            generator=generator,
            kernel_regularizer="fred",
        )

    cluster_gcn = ClusterGCN(
        layer_sizes=[2],
        activations=["relu"],
        generator=generator,
        bias_initializer="zeros",
    )

    cluster_gcn = ClusterGCN(
        layer_sizes=[2],
        activations=["relu"],
        generator=generator,
        bias_initializer=initializers.zeros(),
    )

    with pytest.raises(ValueError):
        ClusterGCN(
            layer_sizes=[2],
            activations=["relu"],
            generator=generator,
            bias_initializer="barney",
        )
Example #15
0
def test_ClusterNodeGenerator_init():

    G = create_stellargraph()

    with pytest.raises(ValueError):
        generator = ClusterNodeGenerator(G, clusters=0)

    # clusters must be integer if not list
    with pytest.raises(TypeError):
        generator = ClusterNodeGenerator(G, clusters=0.5)

    # q must be greater than 0
    with pytest.raises(ValueError):
        generator = ClusterNodeGenerator(G, q=0)

    # q must be integer
    with pytest.raises(TypeError):
        generator = ClusterNodeGenerator(G, q=1.0)

    # number of clusters is not exactly divisible by q
    with pytest.raises(ValueError):
        generator = ClusterNodeGenerator(G, clusters=5, q=2)

    # one cluster, k=len(clusters), not divisible by q
    with pytest.raises(ValueError):
        generator = ClusterNodeGenerator(G, clusters=[["a", "b", "c", "d"]], q=2)

    # this should be ok
    generator = ClusterNodeGenerator(G, clusters=[["a", "b", "c", "d"]], q=1)
    assert generator.k == 1
    assert generator.q == 1

    # two clusters, len(clusters).
    generator = ClusterNodeGenerator(G, clusters=[["a", "d"], ["b", "c"]], q=1)
    assert generator.k == 2
    assert generator.q == 1

    # lam has to be in the interval [0., 1.] and float
    with pytest.raises(TypeError):
        generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=-1)

    with pytest.raises(TypeError):
        generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=1)

    with pytest.raises(ValueError):
        generator = ClusterNodeGenerator(G, clusters=1, q=1, lam=2.5)
def test_ClusterNodeSequence_cluster_without_targets():
    G = create_stellargraph()
    generator = ClusterNodeGenerator(G, clusters=2, q=1)
    seq = generator.flow(node_ids=["a"], targets=[0])
    _ = list(seq)