Example #1
0
def test_APPNP_apply_propagate_model_sparse():

    G, features = create_graph_features()
    adj = G.to_adjacency_matrix()
    features, adj = GCN_Aadj_feats_op(features, adj)
    adj = adj.tocoo()
    A_indices = np.expand_dims(np.hstack((adj.row[:, None], adj.col[:, None])), 0)
    A_values = np.expand_dims(adj.data, 0)

    generator = FullBatchNodeGenerator(G, sparse=True, method="gcn")
    appnpnModel = APPNP([2], generator=generator, activations=["relu"], dropout=0.5)

    fully_connected_model = keras.Sequential()
    fully_connected_model.add(Dense(2))

    x_in, x_out = appnpnModel.propagate_model(fully_connected_model)
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, A_indices, A_values])
    assert preds_1.shape == (1, 2, 2)

    # Check fit method
    preds_2 = model.predict(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Example #2
0
def test_PPNP_edge_cases():
    G, features = create_graph_features()
    adj = nx.to_scipy_sparse_matrix(G)
    features, adj = PPNP_Aadj_feats_op(features, adj)

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f for n, f in zip(nodes, features)}, orient="index"
    )
    G = StellarGraph(G, node_features=node_features)

    ppnp_sparse_failed = False
    try:
        generator = FullBatchNodeGenerator(G, sparse=True, method="ppnp")
    except ValueError as e:
        ppnp_sparse_failed = True
    assert ppnp_sparse_failed

    generator = FullBatchNodeGenerator(G, sparse=False, method="ppnp")

    try:
        ppnpModel = PPNP([2, 2], generator=generator, activations=["relu"], dropout=0.5)
    except ValueError as e:
        error = e
    assert str(error) == "The number of layers should equal the number of activations"

    try:
        ppnpModel = PPNP([2], generator=[0, 1], activations=["relu"], dropout=0.5)
    except TypeError as e:
        error = e
    assert str(error) == "Generator should be a instance of FullBatchNodeGenerator"
def test_dgi(model_type, sparse):

    if sparse and model_type is PPNP:
        pytest.skip("PPNP doesn't support sparse=True")

    G = example_graph_random()
    emb_dim = 16

    generator = FullBatchNodeGenerator(G, sparse=sparse)
    corrupted_generator = CorruptedGenerator(generator)
    gen = corrupted_generator.flow(G.nodes())

    base_model = model_type(
        generator=generator, activations=["relu"], layer_sizes=[emb_dim]
    )
    infomax = DeepGraphInfomax(base_model)

    model = tf.keras.Model(*infomax.in_out_tensors())
    model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam")
    model.fit(gen)

    emb_model = tf.keras.Model(*infomax.embedding_model())
    embeddings = emb_model.predict(generator.flow(G.nodes()))

    assert embeddings.shape == (len(G.nodes()), emb_dim)
Example #4
0
    def test_gat_build_no_norm(self):
        G = example_graph(feature_size=self.F_in)
        gen = FullBatchNodeGenerator(G, sparse=self.sparse, method=self.method)
        gat = GAT(
            layer_sizes=self.layer_sizes,
            activations=self.activations,
            attn_heads=self.attn_heads,
            generator=gen,
            bias=True,
            normalize=None,
            kernel_initializer="ones",
            attn_kernel_initializer="ones",
        )

        x_in, x_out = gat.in_out_tensors()

        model = keras.Model(inputs=x_in, outputs=x_out)

        ng = gen.flow(G.nodes())
        actual = model.predict(ng)

        expected = np.ones((G.number_of_nodes(), self.layer_sizes[-1])) * (
            self.F_in * self.layer_sizes[0] * self.attn_heads *
            np.max(G.node_features(G.nodes())))
        assert np.allclose(expected, actual[0])
Example #5
0
def test_APPNP_apply_dense():
    G, features = create_graph_features()
    adj = G.to_adjacency_matrix()
    features, adj = GCN_Aadj_feats_op(features, adj)
    adj = np.array(adj.todense()[None, :, :])

    generator = FullBatchNodeGenerator(G, sparse=False, method="gcn")
    appnpModel = APPNP([2],
                       generator=generator,
                       activations=["relu"],
                       dropout=0.5)

    x_in, x_out = appnpModel.in_out_tensors()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2)

    # Check fit method
    preds_2 = model.predict(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Example #6
0
    def test_gat_serialize(self):
        G = example_graph(feature_size=self.F_in)
        gen = FullBatchNodeGenerator(G, sparse=self.sparse, method=self.method)
        gat = GAT(
            layer_sizes=self.layer_sizes,
            activations=self.activations,
            attn_heads=self.attn_heads,
            generator=gen,
            bias=True,
            normalize="l2",
        )

        x_in, x_out = gat.in_out_tensors()
        model = keras.Model(inputs=x_in, outputs=x_out)

        ng = gen.flow(G.nodes())

        # Save model
        model_json = model.to_json()

        # Set all weights to one
        model_weights = [np.ones_like(w) for w in model.get_weights()]

        # Load model from json & set all weights
        model2 = keras.models.model_from_json(
            model_json, custom_objects={"GraphAttention": GraphAttention})
        model2.set_weights(model_weights)

        # Test deserialized model
        actual = model2.predict(ng)
        expected = np.ones(
            (G.number_of_nodes(),
             self.layer_sizes[-1])) * (1.0 / G.number_of_nodes())
        assert np.allclose(expected, actual[0])
Example #7
0
def test_PPNP_edge_cases():
    G, features = create_graph_features()
    adj = G.to_adjacency_matrix()
    features, adj = PPNP_Aadj_feats_op(features, adj)

    ppnp_sparse_failed = False
    try:
        generator = FullBatchNodeGenerator(G, sparse=True, method="ppnp")
    except ValueError as e:
        ppnp_sparse_failed = True
    assert ppnp_sparse_failed

    generator = FullBatchNodeGenerator(G, sparse=False, method="ppnp")

    try:
        ppnpModel = PPNP([2, 2], generator=generator, activations=["relu"], dropout=0.5)
    except ValueError as e:
        error = e
    assert str(error) == "The number of layers should equal the number of activations"

    try:
        ppnpModel = PPNP([2], generator=[0, 1], activations=["relu"], dropout=0.5)
    except TypeError as e:
        error = e
    assert str(error) == "Generator should be a instance of FullBatchNodeGenerator"
Example #8
0
def test_GCN_apply_sparse():

    G, features = create_graph_features()
    adj = G.to_adjacency_matrix()
    features, adj = GCN_Aadj_feats_op(features, adj)
    adj = adj.tocoo()
    A_indices = np.expand_dims(
        np.hstack((adj.row[:, None], adj.col[:, None])).astype(np.int64), 0)
    A_values = np.expand_dims(adj.data, 0)

    generator = FullBatchNodeGenerator(G, sparse=True, method="gcn")
    gcnModel = GCN(layer_sizes=[2],
                   activations=["relu"],
                   generator=generator,
                   dropout=0.5)

    x_in, x_out = gcnModel.in_out_tensors()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict(
        [features[None, :, :], out_indices, A_indices, A_values])
    assert preds_1.shape == (1, 2, 2)

    # Check fit method
    preds_2 = model.predict(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Example #9
0
def test_GCN_apply_dense():
    G, features = create_graph_features()
    adj = nx.to_numpy_array(G)[None, :, :]
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="none")
    gcnModel = GCN([2], generator, activations=["relu"], dropout=0.5)

    x_in, x_out = gcnModel.build()
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
Example #10
0
def gcn_pipeline(G,
                 node_subjects,
                 layer_sizes=[16, 16],
                 activations=["relu", "relu"]):
    #Train and test split
    train_subjects, val_subjects, test_subjects = training_split(node_subjects)

    #GCN training generator
    generator = FullBatchNodeGenerator(G, method="gcn")
    train_gen = generator.flow(
        train_subjects.index,
        train_subjects.values,
    )
    gcn = GCN(layer_sizes=layer_sizes,
              activations=activations,
              generator=generator,
              dropout=0.5)
    model = build_model(gcn, train_subjects.values.shape[1])

    val_gen = generator.flow(val_subjects.index, val_subjects.values)
    es_callback = EarlyStopping(monitor="val_acc",
                                patience=50,
                                restore_best_weights=True)
    history = model.fit(
        train_gen,
        epochs=200,
        validation_data=val_gen,
        verbose=0,
        shuffle=
        False,  # this should be False, since shuffling data means shuffling the whole graph
        callbacks=[es_callback],
    )

    plot_results(history)
    test_metrics(generator, model, test_subjects)
Example #11
0
def test_APPNP_apply_propagate_model_dense():
    G, features = create_graph_features()
    adj = nx.to_scipy_sparse_matrix(G)
    features, adj = GCN_Aadj_feats_op(features, adj)
    adj = np.array(adj.todense()[None, :, :])
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="gcn")
    appnpnModel = APPNP([2],
                        generator=generator,
                        activations=["relu"],
                        dropout=0.5)

    fully_connected_model = keras.Sequential()
    fully_connected_model.add(Dense(2))

    x_in, x_out = appnpnModel.propagate_model(fully_connected_model)
    model = keras.Model(inputs=x_in, outputs=x_out)

    # Check fit method
    out_indices = np.array([[0, 1]], dtype="int32")
    preds_1 = model.predict([features[None, :, :], out_indices, adj])
    assert preds_1.shape == (1, 2, 2)

    # Check fit_generator method
    preds_2 = model.predict_generator(generator.flow(["a", "b"]))
    assert preds_2.shape == (1, 2, 2)

    assert preds_1 == pytest.approx(preds_2)
    def test_generator_flow_targets_as_list(self):
        generator = FullBatchNodeGenerator(self.G)
        node_ids = list(self.G.nodes())[:3]
        node_targets = [1] * len(node_ids)
        gen = generator.flow(node_ids, node_targets)

        inputs, y = gen[0]
        assert y.shape == (1, 3)
        assert np.sum(y) == 3
    def preprocessing_predict(self, g, test_node, file_emb_output="./emb/100_900_nede2vec.emb"):

        node_subjects = test_node['values']

        node_subjects = node_subjects.astype(str)
        print(Counter(node_subjects))

        #file_emb_output = "./emb/100_900_nede2vec.emb"
        model = KeyedVectors.load_word2vec_format(file_emb_output)
        node_ids = model.wv.index2word
        node_embeddings = (
            model.wv.vectors
        )  # num
        print("Embedding load success.")

        reinex_node_embedding = pd.DataFrame(node_embeddings, index=map(int, node_ids))
        g_feature_attr = g.copy()

        G = StellarGraph.from_networkx(
            g_feature_attr, node_features=reinex_node_embedding, node_type_default="n", edge_type_default="e"
        )
        print(G.info())

        # train_subjects, test_subjects = model_selection.train_test_split(
        #     node_subjects,  stratify=node_subjects #train_size=160, test_size=None,
        # )
        # # val_subjects, test_subjects = model_selection.train_test_split(
        # #     test_subjects, train_size=20, test_size=None, stratify=test_subjects
        # # )

        #train_subjects.value_counts().to_frame()

        #target_encoding = preprocessing.LabelBinarizer()
        # target_encoding = preprocessing.OneHotEncoder()

        # train_targets = target_encoding.fit_transform(train_subjects)
        # val_targets = target_encoding.transform(val_subjects)
        # test_targets = target_encoding.transform(test_subjects)

        generator = FullBatchNodeGenerator(G, method="gcn")
        # train_gen = generator.flow(train_subjects.index, train_targets)
        # val_gen = generator.flow(val_subjects.index, val_targets)
        # test_gen = generator.flow(test_subjects.index, test_targets)

        all_nodes = node_subjects.index
        test_gen = generator.flow(all_nodes)

        return G, test_gen, generator
    def test_fullbatch_generator_init_3(self):
        G, feats = create_graph_features()

        func = "Not callable"

        with pytest.raises(ValueError):
            generator = FullBatchNodeGenerator(G, "test", transform=func)
Example #15
0
def test_GCN_regularisers():
    G, features = create_graph_features()
    adj = nx.to_numpy_array(G)[None, :, :]
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="none")

    gcn = GCN([2], generator)

    gcn = GCN([2], generator, kernel_initializer="ones")

    gcn = GCN([2], generator, kernel_initializer=initializers.ones())

    with pytest.raises(ValueError):
        gcn = GCN([2], generator, kernel_initializer="fred")

    gcn = GCN([2], generator, bias_initializer="zeros")

    gcn = GCN([2], generator, bias_initializer=initializers.zeros())

    with pytest.raises(ValueError):
        gcn = GCN([2], generator, bias_initializer="barney")
Example #16
0
def test_GCN_activations():
    G, features = create_graph_features()
    adj = nx.to_numpy_array(G)[None, :, :]
    n_nodes = features.shape[0]

    nodes = G.nodes()
    node_features = pd.DataFrame.from_dict(
        {n: f
         for n, f in zip(nodes, features)}, orient="index")
    G = StellarGraph(G, node_features=node_features)

    generator = FullBatchNodeGenerator(G, sparse=False, method="none")

    gcn = GCN([2], generator)
    assert gcn.activations == ["relu"]

    gcn = GCN([2, 2], generator)
    assert gcn.activations == ["relu", "relu"]

    gcn = GCN([2], generator, activations=["linear"])
    assert gcn.activations == ["linear"]

    with pytest.raises(ValueError):
        # More regularisers than layers
        gcn = GCN([2], generator, activations=["relu", "linear"])

    with pytest.raises(ValueError):
        # Fewer regularisers than layers
        gcn = GCN([2, 2], generator, activations=["relu"])

    with pytest.raises(ValueError):
        # Unknown regularisers
        gcn = GCN([2], generator, activations=["bleach"])
 def test_generator_constructor_hin(self):
     Ghin = example_hin_1({})
     with pytest.raises(
         ValueError,
         match="G: expected a graph with a single node type, found a graph with node types: 'A', 'B'",
     ):
         generator = FullBatchNodeGenerator(Ghin)
Example #18
0
def _dispatch_generator(graph, model_name, params,
                        generator_type="node"):
    """Create a graph generator."""
    if model_name == "watchyourstep":
        return AdjacencyPowerGenerator(
            graph, num_powers=params["num_powers"])
    elif model_name in ["complex", "distmult"]:
        return KGTripleGenerator(graph, params["batch_size"])
    elif model_name == "attri2vec":
        if generator_type == "node":
            return Attri2VecNodeGenerator(
                graph, params["batch_size"])
        else:
            return Attri2VecLinkGenerator(
                graph, params["batch_size"])
    elif model_name in ["graphsage", "graphsage_dgi"]:
        if generator_type == "node":
            return GraphSAGENodeGenerator(
                graph, params["batch_size"], params["num_samples"])
        else:
            return GraphSAGELinkGenerator(
                graph, params["batch_size"], params["num_samples"])
    elif model_name in ["gcn_dgi", "gat_dgi"]:
        return FullBatchNodeGenerator(graph, sparse=False)
    elif model_name in ["cluster_gcn_dgi", "cluster_gat_dgi"]:
        return ClusterNodeGenerator(
            graph, clusters=params["clusters"],
            q=params["clusters_q"])
    else:
        raise ValueError(f"Unknown model name '{model_name}'")
Example #19
0
 def test_kernel_and_bias_defaults(self):
     graph = example_graph(feature_size=self.F_in)
     gen = FullBatchNodeGenerator(graph,
                                  sparse=self.sparse,
                                  method=self.method)
     gat = GAT(
         layer_sizes=self.layer_sizes,
         activations=self.activations,
         attn_heads=self.attn_heads,
         generator=gen,
     )
     for layer in gat._layers:
         if isinstance(layer, GraphAttention):
             assert isinstance(layer.kernel_initializer,
                               tf.initializers.GlorotUniform)
             assert isinstance(layer.bias_initializer,
                               tf.initializers.Zeros)
             assert isinstance(layer.attn_kernel_initializer,
                               tf.initializers.GlorotUniform)
             assert layer.kernel_regularizer is None
             assert layer.bias_regularizer is None
             assert layer.attn_kernel_regularizer is None
             assert layer.kernel_constraint is None
             assert layer.bias_constraint is None
             assert layer.attn_kernel_constraint is None
Example #20
0
def test_GCN_activations():
    G, features = create_graph_features()
    adj = G.to_adjacency_matrix().toarray()[None, :, :]
    n_nodes = features.shape[0]

    generator = FullBatchNodeGenerator(G, sparse=False, method="none")

    gcn = GCN([2], generator)
    assert gcn.activations == ["relu"]

    gcn = GCN([2, 2], generator)
    assert gcn.activations == ["relu", "relu"]

    gcn = GCN([2], generator, activations=["linear"])
    assert gcn.activations == ["linear"]

    with pytest.raises(ValueError):
        # More regularisers than layers
        gcn = GCN([2], generator, activations=["relu", "linear"])

    with pytest.raises(ValueError):
        # Fewer regularisers than layers
        gcn = GCN([2, 2], generator, activations=["relu"])

    with pytest.raises(ValueError):
        # Unknown regularisers
        gcn = GCN([2], generator, activations=["bleach"])
Example #21
0
def test_GCN_init():
    G, _ = create_graph_features()

    generator = FullBatchNodeGenerator(G)
    gcnModel = GCN([2], generator, activations=["relu"], dropout=0.5)

    assert gcnModel.layer_sizes == [2]
    assert gcnModel.activations == ["relu"]
    assert gcnModel.dropout == 0.5
    def test_weighted(self):
        G = example_graph(feature_size=2, edge_weights=True)

        generator = FullBatchNodeGenerator(G, weighted=True, method=None)
        np.testing.assert_array_equal(
            generator.Aadj.todense(),
            [[0, 0.1, 0, 20.0], [0.1, 0, 1.0, 1.3], [0, 1.0, 0, 0],
             [20.0, 1.3, 0, 0]],
        )
Example #23
0
def create_GCN_model(graph):

    generator = FullBatchNodeGenerator(graph)
    train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    base_model = GCN(
        layer_sizes=[8, 2],
        generator=generator,
        bias=True,
        dropout=0.5,
        activations=["elu", "softmax"],
    )

    x_inp, x_out = base_model.build()

    keras_model = Model(inputs=x_inp, outputs=x_out)

    return base_model, keras_model, generator, train_gen
    def test_fullbatch_generator_transform(self):
        def func(features, A, **kwargs):
            return features, A.dot(A)

        generator = FullBatchNodeGenerator(self.G, "test", transform=func)
        assert generator.name == "test"

        A = self.G.to_adjacency_matrix().toarray()
        assert np.array_equal(A.dot(A), generator.Aadj.toarray())
Example #25
0
def create_GCN_model_sparse(graph):
    generator = FullBatchNodeGenerator(graph, sparse=True, method="gcn")
    train_gen = generator.flow([0, 1], np.array([[1, 0], [0, 1]]))

    layer_sizes = [2, 2]
    gcn = GCN(
        layer_sizes=layer_sizes,
        activations=["elu", "elu"],
        generator=generator,
        dropout=0.3,
        kernel_regularizer=regularizers.l2(5e-4),
    )

    for layer in gcn._layers:
        layer._initializer = "ones"
    x_inp, x_out = gcn.build()
    keras_model = Model(inputs=x_inp, outputs=x_out)
    return gcn, keras_model, generator, train_gen
    def generator_flow(
        self,
        G,
        node_ids,
        node_targets,
        sparse=False,
        method="none",
        k=1,
        teleport_probability=0.1,
    ):
        generator = FullBatchNodeGenerator(
            G,
            sparse=sparse,
            method=method,
            k=k,
            teleport_probability=teleport_probability,
        )
        n_nodes = G.number_of_nodes()

        gen = generator.flow(node_ids, node_targets)
        if sparse:
            [X, tind, A_ind, A_val], y = gen[0]
            A_sparse = sps.coo_matrix(
                (A_val[0], (A_ind[0, :, 0], A_ind[0, :, 1])),
                shape=(n_nodes, n_nodes))
            A_dense = A_sparse.toarray()

        else:
            [X, tind, A], y = gen[0]
            A_dense = A[0]

        assert np.allclose(X,
                           gen.features)  # X should be equal to gen.features
        assert tind.shape[1] == len(node_ids)

        if node_targets is not None:
            assert np.allclose(y, node_targets)

        # Check that the diagonals are one
        if method == "self_loops":
            assert np.allclose(A_dense.diagonal(), 1)

        return A_dense, tind, y
Example #27
0
    def test_fullbatch_generator_init_1(self):
        G, feats = create_graph_features()
        nodes = G.nodes()
        node_features = pd.DataFrame.from_dict(
            {n: f for n, f in zip(nodes, feats)}, orient="index"
        )
        G = StellarGraph(G, node_type_name="node", node_features=node_features)

        generator = FullBatchNodeGenerator(G, method=None)
        assert np.array_equal(feats, generator.features)
Example #28
0
def create_GAT_model(graph):
    generator = FullBatchNodeGenerator(graph, sparse=False, method=None)
    train_gen = generator.flow([0, 1], np.array([[1, 0], [0, 1]]))

    gat = GAT(
        layer_sizes=[2, 2],
        generator=generator,
        bias=False,
        in_dropout=0,
        attn_dropout=0,
        activations=["elu", "softmax"],
        normalize=None,
        saliency_map_support=True,
    )
    for layer in gat._layers:
        layer._initializer = "ones"
    x_inp, x_out = gat.build()
    keras_model = Model(inputs=x_inp, outputs=x_out)
    return gat, keras_model, generator, train_gen
def main(graph_loc, layer_sizes, activations, dropout, learning_rate):
    edgelist = pd.read_table(
        os.path.join(graph_loc, 'cora.cites'), header=None, names=['source', 'target']
    )

    # Load node features
    # The CORA dataset contains binary attributes 'w_x' that correspond to whether the corresponding keyword
    # (out of 1433 keywords) is found in the corresponding publication.
    feature_names = ['w_{}'.format(ii) for ii in range(1433)]
    # Also, there is a "subject" column
    column_names = feature_names + ['subject']
    node_data = pd.read_table(
        os.path.join(graph_loc, 'cora.content'), header=None, names=column_names
    )

    target_encoding = feature_extraction.DictVectorizer(sparse=False)
    node_targets = target_encoding.fit_transform(
        node_data[['subject']].to_dict("records")
    )

    node_ids = node_data.index
    node_features = node_data[feature_names]

    Gnx = nx.from_pandas_edgelist(edgelist)

    # Convert to StellarGraph and prepare for ML
    G = sg.StellarGraph(Gnx, node_type_name="label", node_features=node_features)

    # Split nodes into train/test using stratification.
    train_nodes, test_nodes, train_targets, test_targets = model_selection.train_test_split(
        node_ids, node_targets, train_size=140, test_size=None, stratify=node_targets, random_state=55232
    )

    # Split test set into test and validation
    val_nodes, test_nodes, val_targets, test_targets = model_selection.train_test_split(
        test_nodes, test_targets, train_size=300, test_size=None, random_state=523214
    )

    generator = FullBatchNodeGenerator(G, func_opt=GCN_Aadj_feats_op, filter='localpool')

    model = train(train_nodes, train_targets, val_nodes, val_targets, generator, dropout,
        layer_sizes, learning_rate, activations)

    # Save the trained model
    save_str = "_h{}_l{}_d{}_r{}".format(
        "gcn", ''.join([str(x) for x in layer_sizes]), str(dropout), str(learning_rate)
    )

    model.save("cora_gcn_model" + save_str + ".h5")

    # We must also save the target encoding to convert model predictions
    with open("cora_gcn_encoding" + save_str + ".pkl", "wb") as f:
        pickle.dump([target_encoding], f)

    test(test_nodes, test_targets, generator, "cora_gcn_model" + save_str + ".h5")
Example #30
0
def create_GAT_model(graph):

    generator = FullBatchNodeGenerator(graph, sparse=False)
    train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    base_model = GAT(
        layer_sizes=[8, 8, 2],
        generator=generator,
        bias=True,
        in_dropout=0.5,
        attn_dropout=0.5,
        activations=["elu", "elu", "softmax"],
        normalize=None,
    )

    x_inp, x_out = base_model.build()

    keras_model = Model(inputs=x_inp, outputs=x_out)

    return base_model, keras_model, generator, train_gen