def run_deep_graph_infomax(base_model, generator, epochs, node_type): corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(G.nodes(node_type=node_type)) infomax = DeepGraphInfomax(base_model, corrupted_generator) x_in, x_out = infomax.in_out_tensors() print("Starting Training") ttrain = time.time() # Train model = Model(inputs=x_in, outputs=x_out) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3)) es = EarlyStopping(monitor="loss", min_delta=0, patience=patience) history = model.fit(gen, epochs=epochs, verbose=verbose, callbacks=[es]) # sg.utils.plot_history(history) ttrain1 = time.time() print( f"Training complete in {(ttrain1-ttrain):.2f} s ({(ttrain1-ttrain)/60:.2f} min)" ) x_emb_in, x_emb_out = base_model.in_out_tensors() # for full batch models, squeeze out the batch dim (which is 1) if generator.num_batch_dims() == 2: x_emb_out = tf.squeeze(x_emb_out, axis=0) return x_emb_in, x_emb_out
def run_deep_graph_infomax(base_model, generator, epochs): print(f"Starting training for {v_type} type: ") t0 = time.time() corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(G.nodes(node_type=v_type)) infomax = DeepGraphInfomax(base_model, corrupted_generator) x_in, x_out = infomax.in_out_tensors() # Train with DGI model = Model(inputs=x_in, outputs=x_out) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3)) es = EarlyStopping(monitor="loss", min_delta=0, patience=10) history = model.fit(gen, epochs=epochs, verbose=verbose, callbacks=[es]) #sg.utils.plot_history(history) x_emb_in, x_emb_out = base_model.in_out_tensors() if generator.num_batch_dims() == 2: x_emb_out = tf.squeeze(x_emb_out, axis=0) t1 = time.time() print(f'Time required: {t1-t0:.2f} s ({(t1-t0)/60:.1f} min)') return x_emb_in, x_emb_out, model
def test_dgi(model_type, sparse): if sparse and model_type is PPNP: pytest.skip("PPNP doesn't support sparse=True") G = example_graph_random() emb_dim = 16 generator = FullBatchNodeGenerator(G, sparse=sparse) corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(G.nodes()) base_model = model_type( generator=generator, activations=["relu"], layer_sizes=[emb_dim] ) infomax = DeepGraphInfomax(base_model) model = tf.keras.Model(*infomax.in_out_tensors()) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam") model.fit(gen) emb_model = tf.keras.Model(*infomax.embedding_model()) embeddings = emb_model.predict(generator.flow(G.nodes())) assert embeddings.shape == (len(G.nodes()), emb_dim)
def test_dgi(model_type, sparse): if sparse and model_type is PPNP: pytest.skip("PPNP doesn't support sparse=True") G = example_graph_random() generator = FullBatchNodeGenerator(G, sparse=sparse) corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(G.nodes()) assert_reproducible(lambda: dgi(generator, gen, model_type), num_iter=3)
def test_dgi_stateful(): G = example_graph_random() emb_dim = 16 generator = FullBatchNodeGenerator(G) corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(G.nodes()) infomax = DeepGraphInfomax( GCN(generator=generator, activations=["relu"], layer_sizes=[emb_dim]) ) model_1 = tf.keras.Model(*infomax.in_out_tensors()) model_2 = tf.keras.Model(*infomax.in_out_tensors()) # check embeddings are equal before training embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) assert np.array_equal(embeddings_1, embeddings_2) model_1.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam") model_1.fit(gen) # check embeddings are still equal after training one model embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) assert np.array_equal(embeddings_1, embeddings_2) model_2.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer="Adam") model_2.fit(gen) # check embeddings are still equal after training both models embeddings_1 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) embeddings_2 = tf.keras.Model(*infomax.embedding_model()).predict( generator.flow(G.nodes()) ) assert np.array_equal(embeddings_1, embeddings_2)
def _execute_deep_graph_infomax(train_graph, embedding_layer, generator, params): corrupted_generator = CorruptedGenerator(generator) gen = corrupted_generator.flow(train_graph.nodes()) infomax = DeepGraphInfomax(embedding_layer, corrupted_generator) x_in, x_out = infomax.in_out_tensors() model = Model(inputs=x_in, outputs=x_out) model.compile( loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=optimizers.Adam(lr=1e-3)) model.fit(gen, epochs=params["epochs"], verbose=0) x_emb_in, x_emb_out = embedding_layer.in_out_tensors() # for full batch models, squeeze out the batch dim (which is 1) if generator.num_batch_dims() == 2: x_emb_out = tf.squeeze(x_emb_out, axis=0) embedding_model = Model(inputs=x_emb_in, outputs=x_emb_out) return embedding_model
from tensorflow.keras import Model #Load data. there is also a demo on data loading dataset = datasets.Cora() display(HTML(dataset.description)) G, node_subjects = dataset.load() # We create and train our DeepGraphInfomax model (docs). Note that the loss used here must always be # tf.nn.sigmoid_cross_entropy_with_logits. fullbatch_generator = FullBatchNodeGenerator(G, sparse=False) gcn_model = GCN(layer_sizes=[2], activations=["relu"], generator=fullbatch_generator) corrupted_generator = CorruptedGenerator(fullbatch_generator) gen = corrupted_generator.flow(G.nodes()) infomax = DeepGraphInfomax(gcn_model, corrupted_generator) x_in, x_out = infomax.in_out_tensors() model = Model(inputs=x_in, outputs=x_out) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3)) epochs = 100 es = EarlyStopping(monitor="loss", min_delta=0, patience=20) history = model.fit(gen, epochs=epochs, verbose=0, callbacks=[es]) plot_history(history) x_emb_in, x_emb_out = gcn_model.in_out_tensors()
# intuition for GNN: # https://medium.com/analytics-vidhya/getting-the-intuition-of-graph-neural-networks-a30a2c34280d # understanding GCN: # https://towardsdatascience.com/understanding-graph-convolutional-networks-for-node-classification-a2bfdb7aba7b # stellargraph implementation # https://medium.com/stellargraph/do-i-know-you-flexible-unsupervised-and-semi-supervised-graph-models-with-deep-graph-infomax-96fbfd63ec31 # noqa: E501 # 2-layer GCN model # https://stellargraph.readthedocs.io/en/stable/api.html?highlight=gcn#stellargraph.layer.GCN gcn_model = GCN(layer_sizes=[128, 128], activations=["relu", "relu"], generator=fullbatch_generator) # data generator to shuffle node features for corrupted graph corrupted_generator = CorruptedGenerator(fullbatch_generator) # produce object for training model gen = corrupted_generator.flow(stellar_G.nodes()) # create Deep Graph Infomax model infomax = DeepGraphInfomax(gcn_model, corrupted_generator) x_in, x_out = infomax.in_out_tensors() # train model model = Model(inputs=x_in, outputs=x_out) model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits, optimizer=Adam(lr=1e-3)) model.summary() # create a model image, print to file plot_model(model, show_shapes=True, to_file="model.png") epochs = 1000 es = EarlyStopping(monitor="loss", min_delta=0, patience=20)