Esempio n. 1
0
    def train_clf(self, graph, L):
        '''
			Train SGC model with updated labeled pool L
			Return new trained model
		'''
        train_targets = self.target_encoding.transform(
            self.df_targets.loc[L].to_dict("records"))
        train_gen = self.generator.flow(L, train_targets)

        sgc = GCN(
            layer_sizes=[train_targets.shape[1]],
            generator=self.generator,
            bias=True,
            dropout=0.5,
            activations=["softmax"],
            kernel_regularizer=regularizers.l2(5e-4),
        )

        x_inp, predictions = sgc.build()

        class_support = dict(Counter(self.df_targets.loc[L]["label"]))
        classes = sorted(self.data.class_labels)
        counts = [
            class_support[c] if c in class_support else 0 for c in classes
        ]
        weights = np.sum(counts) / np.array(counts)
        weighted_loss = self.weighted_categorical_crossentropy(weights)

        model = Model(inputs=x_inp, outputs=predictions)
        model.compile(
            optimizer=optimizers.Adam(lr=0.2),
            # loss=losses.categorical_crossentropy,
            loss=weighted_loss,
            metrics=["acc"],
        )

        # if not os.path.isdir("model_logs"):
        #     os.makedirs("model_logs")
        # es_callback = EarlyStopping(
        #     monitor="acc", patience=50
        # )  # patience is the number of epochs to wait before early stopping in case of no further improvement
        # mc_callback = ModelCheckpoint(
        #     "model_logs/best_model.h5", monitor="acc", save_best_only=True, save_weights_only=True
        # )

        history = model.fit_generator(
            train_gen,
            epochs=50,
            verbose=0,
            shuffle=
            False,  # this should be False, since shuffling data means shuffling the whole graph
            # callbacks=[es_callback, mc_callback],
        )

        # model.load_weights("model_logs/best_model.h5")

        return model
Esempio n. 2
0
def train(
    train_nodes,
    train_targets,
    val_nodes,
    val_targets,
    generator,
    dropout,
    layer_sizes,
    learning_rate,
    activations,
    num_epochs,
):
    """

    Train a GCN model on the specified graph G with given parameters, evaluate it, and save the model.
    Args:
        train_nodes: A list of train nodes
        train_targets: Labels of train nodes
        val_nodes: A list of validation nodes
        val_targets: Labels of validation nodes
        generator: A FullBatchNodeGenerator
        dropout: The dropout (0->1) Initial Learning rate
        layer_sizes: A list of number of hidden nodes in each layer
        learning_rate: Initial Learning rate
        activations: A list of number of activation functions in each layer
    """

    train_gen = generator.flow(train_nodes, train_targets)
    val_gen = generator.flow(val_nodes, val_targets)
    gcnModel = GCN(
        layer_sizes,
        generator,
        bias=True,
        dropout=dropout,
        kernel_regularizer=regularizers.l2(5e-4),
        activations=activations,
    )

    # Expose the input and output sockets of the model:
    x_inp, x_out = gcnModel.build()

    # Create Keras model for training
    model = keras.Model(inputs=x_inp, outputs=x_out)
    model.compile(
        loss=losses.categorical_crossentropy,
        metrics=[metrics.categorical_accuracy],
        optimizer=optimizers.Adam(lr=learning_rate),
    )

    # Train model
    history = model.fit_generator(train_gen,
                                  epochs=num_epochs,
                                  validation_data=val_gen,
                                  verbose=2,
                                  shuffle=False)

    return model
Esempio n. 3
0
def gcn_pipeline(G,
                 node_subjects,
                 layer_sizes=[16, 16],
                 activations=["relu", "relu"]):
    #Train and test split
    train_subjects, val_subjects, test_subjects = training_split(node_subjects)

    #GCN training generator
    generator = FullBatchNodeGenerator(G, method="gcn")
    train_gen = generator.flow(
        train_subjects.index,
        train_subjects.values,
    )
    gcn = GCN(layer_sizes=layer_sizes,
              activations=activations,
              generator=generator,
              dropout=0.5)
    model = build_model(gcn, train_subjects.values.shape[1])

    val_gen = generator.flow(val_subjects.index, val_subjects.values)
    es_callback = EarlyStopping(monitor="val_acc",
                                patience=50,
                                restore_best_weights=True)
    history = model.fit(
        train_gen,
        epochs=200,
        validation_data=val_gen,
        verbose=0,
        shuffle=
        False,  # this should be False, since shuffling data means shuffling the whole graph
        callbacks=[es_callback],
    )

    plot_results(history)
    test_metrics(generator, model, test_subjects)
Esempio n. 4
0
def create_GCN_model(graph):

    generator = FullBatchNodeGenerator(graph)
    train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    base_model = GCN(
        layer_sizes=[8, 2],
        generator=generator,
        bias=True,
        dropout=0.5,
        activations=["elu", "softmax"],
    )

    x_inp, x_out = base_model.build()

    keras_model = Model(inputs=x_inp, outputs=x_out)

    return base_model, keras_model, generator, train_gen
Esempio n. 5
0
def create_GCN_model_sparse(graph):
    generator = FullBatchNodeGenerator(graph, sparse=True, method="gcn")
    train_gen = generator.flow([0, 1], np.array([[1, 0], [0, 1]]))

    layer_sizes = [2, 2]
    gcn = GCN(
        layer_sizes=layer_sizes,
        activations=["elu", "elu"],
        generator=generator,
        dropout=0.3,
        kernel_regularizer=regularizers.l2(5e-4),
    )

    for layer in gcn._layers:
        layer._initializer = "ones"
    x_inp, x_out = gcn.build()
    keras_model = Model(inputs=x_inp, outputs=x_out)
    return gcn, keras_model, generator, train_gen
Esempio n. 6
0
def make_gcn(train_targets, generator):
    gcn = GCN(layer_sizes=[90, 90],
              activations=["relu", "relu"],
              generator=generator,
              dropout=0.5)

    x_inp, x_out = gcn.in_out_tensors()
    #predictions = keras.layers.Softmax()(x_out)
    #predictions = keras.layers.()(x_out)
    predictions = layers.Dense(units=train_targets.shape[1],
                               activation="sigmoid")(x_out)

    gcn_model = Model(inputs=x_inp, outputs=predictions)
    gcn_model.compile(
        optimizer=optimizers.Adam(lr=0.005),
        loss=losses.mean_squared_error,
        metrics=["acc"],
    )
    embedding_model = Model(inputs=x_inp, outputs=x_out)
    return gcn_model, embedding_model
def test_dgi_stateful():
    G = example_graph_random()
    emb_dim = 16

    generator = FullBatchNodeGenerator(G)
    corrupted_generator = CorruptedGenerator(generator)
    gen = corrupted_generator.flow(G.nodes())

    infomax = DeepGraphInfomax(
        GCN(generator=generator, activations=["relu"], layer_sizes=[emb_dim])
    )

    model_1 = tf.keras.Model(*infomax.in_out_tensors())
    model_2 = tf.keras.Model(*infomax.in_out_tensors())

    # check embeddings are equal before training
    embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )
    embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )

    assert np.array_equal(embeddings_1, embeddings_2)

    model_1.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam")
    model_1.fit(gen)

    # check embeddings are still equal after training one model
    embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )
    embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )

    assert np.array_equal(embeddings_1, embeddings_2)

    model_2.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam")
    model_2.fit(gen)

    # check embeddings are still equal after training both models
    embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )
    embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict(
        generator.flow(G.nodes())
    )

    assert np.array_equal(embeddings_1, embeddings_2)
Esempio n. 8
0
def _fit_deep_graph_infomax(train_graph, params, model_name):
    """Train unsupervised Deep Graph Infomax."""
    if "gcn_dgi" in model_name or "gat_dgi" in model_name:
        if "cluster" in model_name:
            generator = ClusterNodeGenerator(
                train_graph, clusters=params["clusters"],
                q=params["clusters_q"])
        else:
            generator = FullBatchNodeGenerator(train_graph, sparse=False)

        if "gcn_dgi" in model_name:
            embedding_layer = GCN(
                layer_sizes=[params["embedding_dimension"]],
                activations=["relu"], generator=generator)
        elif "gat_dgi" in model_name:
            embedding_layer = GAT(
                layer_sizes=[params["embedding_dimension"]],
                activations=["relu"], generator=generator, attn_heads=8)
    elif model_name == "graphsage_dgi":
        generator = GraphSAGENodeGenerator(
            train_graph, batch_size=50, num_samples=[5])
        embedding_layer = GraphSAGE(
            layer_sizes=[params["embedding_dimension"]], activations=["relu"],
            generator=generator
        )
    else:
        raise ValueError(f"Unknown mode name {model_name}")

    embedding_model = _execute_deep_graph_infomax(
        train_graph, embedding_layer, generator, params)

    # Here the models can be both inductive and transductive
    if model_name in ["gcn_dgi", "gat_dgi", "graphsage_dgi"]:
        return embedding_model.predict(
            generator.flow(train_graph.nodes()))
    else:
        return embedding_model
Esempio n. 9
0
    node_subjects, train_size=140, test_size=None, stratify=node_subjects)
val_subjects, test_subjects = model_selection.train_test_split(
    test_subjects, train_size=500, test_size=None, stratify=test_subjects)

target_encoding = preprocessing.LabelBinarizer()

train_targets = target_encoding.fit_transform(train_subjects)
val_targets = target_encoding.transform(val_subjects)
test_targets = target_encoding.transform(test_subjects)

generator = FullBatchNodeGenerator(G, method="gcn")

train_gen = generator.flow(train_subjects.index, train_targets)

gcn = GCN(layer_sizes=[16, 8, 8],
          activations=["relu", "relu", "relu"],
          generator=generator,
          dropout=0.5)

model = Model(inputs=x_inp, outputs=predictions)
model.compile(
    optimizer=optimizers.Adam(lr=0.01),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)

history = model.fit(
    train_gen,
    epochs=200,
    validation_data=val_gen,
    verbose=2,
    shuffle=
Esempio n. 10
0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras import Model


#Load data. there is also a demo on data loading
dataset = datasets.Cora()
display(HTML(dataset.description))
G, node_subjects = dataset.load()

# We create and train our DeepGraphInfomax model (docs). Note that the loss used here must always be
# tf.nn.sigmoid_cross_entropy_with_logits.

fullbatch_generator = FullBatchNodeGenerator(G, sparse=False)
gcn_model = GCN(layer_sizes=[2], activations=["relu"], generator=fullbatch_generator)

corrupted_generator = CorruptedGenerator(fullbatch_generator)
gen = corrupted_generator.flow(G.nodes())

infomax = DeepGraphInfomax(gcn_model, corrupted_generator)
x_in, x_out = infomax.in_out_tensors()

model = Model(inputs=x_in, outputs=x_out)
model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3))

epochs = 100

es = EarlyStopping(monitor="loss", min_delta=0, patience=20)
history = model.fit(gen, epochs=epochs, verbose=0, callbacks=[es])
plot_history(history)
    # it returns a Stellargraph object, and the node subjects (classes)
    # The features (word occurencess) are already built-in into the "stellar_g" object of type Stellargraph
    stellar_g, node_classes = cora_dataset.load(directed=True)
    train_dataset, test_dataset = split_data(node_classes)
    train_targets, test_targets, target_encoding = encode_classes(
        train_dataset, test_dataset)

    ###############################################################

    # creating GCN model
    gcn_generator = FullBatchNodeGenerator(stellar_g,
                                           method="gcn",
                                           sparse=False)
    train_gcn_gen = gcn_generator.flow(train_dataset.index, train_targets)
    gcn = GCN(layer_sizes=[16, 16],
              activations=['relu', 'relu'],
              generator=gcn_generator,
              dropout=0.5)  # 2 GCN layers
    gcn_inp, gcn_out = gcn.in_out_tensors()  # for the KERAS model

    # creating KERAS model with the GCN model layers
    gcn_dense_layer = layers.Dense(units=train_targets.shape[1],
                                   activation="softmax")(gcn_out)
    keras_gcn = Model(inputs=gcn_inp,
                      outputs=gcn_dense_layer)  # 2 GCN, 1 Dense
    keras_gcn.compile(
        optimizer="adam",
        loss=losses.categorical_crossentropy,
        metrics=["accuracy"],
    )
    keras_gcn.fit(train_gcn_gen,
                  epochs=10,
Esempio n. 12
0
edge_labels_test = [1 for i in range(len(G_test.edges()))]
for neg_edge in edges_test_neg:
    edge_ids_test.append(neg_edge)
    edge_labels_test.append(0)
print(G_test.info())

epochs = 50

train_gen = sg.mapper.FullBatchLinkGenerator(G_train, method="gcn")
train_flow = train_gen.flow(edge_ids_train, edge_labels_train)

test_gen = FullBatchLinkGenerator(G_test, method="gcn")
test_flow = train_gen.flow(edge_ids_test, edge_labels_test)

gcn = GCN(layer_sizes=[16, 16],
          activations=["relu", "relu"],
          generator=train_gen,
          dropout=0.3)

x_inp, x_out = gcn.in_out_tensors()

prediction = LinkEmbedding(activation="relu", method="ip")(x_out)
prediction = keras.layers.Reshape((-1, ))(prediction)

model = keras.Model(inputs=x_inp, outputs=prediction)

model.compile(
    optimizer=keras.optimizers.Adam(lr=0.01),
    loss=keras.losses.binary_crossentropy,
    metrics=["accuracy"],
)
edge_splitter_train = EdgeSplitter(Gs)

# Randomly sample a fraction p=0.1 of all positive links, and same number of negative links
# reduced graph G_train with the sampled links removed:
G_train, edge_ids_train, edge_labels_train = edge_splitter_train.train_test_split(
    p=0.1, method="global", keep_connected=True)

epochs = 300

train_gen = FullBatchLinkGenerator(G_train, method="gcn", weighted=True)
train_flow = train_gen.flow(edge_ids_train, edge_labels_train)

layer_sizes = [20, 20]

gcn = GCN(layer_sizes=layer_sizes,
          activations=["elu", "softmax"],
          generator=train_gen,
          dropout=0.5)

x_inp, x_out = gcn.in_out_tensors()

prediction = LinkEmbedding(activation="relu", method="ip")(x_out)

model = keras.Model(inputs=x_inp, outputs=prediction)

# use adam optimizers and set learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.01),
              loss=keras.losses.binary_crossentropy,
              metrics=["acc", f1_m, precision_m, recall_m])

init_train_metrics = model.evaluate(train_flow)
Esempio n. 14
0
all_targets = target_encoding.transform(graph_labels)

generator = FullBatchNodeGenerator(graph_stellar, method="gcn")

train_gen = generator.flow(train_subjects.index, train_targets)
val_gen = generator.flow(val_subjects.index, val_targets)
test_gen = generator.flow(test_subjects.index, test_targets)
all_gen = generator.flow(graph_labels.index, all_targets)

es_callback = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
auc = tf.keras.metrics.AUC()

with tf.device('/CPU:0'):
    gcn = GCN(
        layer_sizes=[2*node_feature_count, 2*node_feature_count],
        activations=['relu', 'relu'],
        generator=generator
    )

    x_inp, x_out = gcn.in_out_tensors()

    # predictions = Dense(units=train_targets.shape[1], activation="softmax")(x_out)
    predictions = Dense(units=10)(x_out)
    predictions = tf.keras.activations.relu(predictions)
    predictions = Dense(units=train_targets.shape[1], activation="softmax")(predictions)

    model = Model(inputs=x_inp, outputs=predictions)

    model.compile(
        optimizer=optimizers.Adam(lr=0.05, amsgrad=True),
        loss=tf.losses.categorical_crossentropy,