def test_compile(): tf.keras.backend.clear_session() graph = example_graph_1(feature_size=10) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph), create_HinSAGE_model(graph), create_graphSAGE_model(graph, link_prediction=True), create_HinSAGE_model(graph, link_prediction=True), create_GCN_model(graph), create_GAT_model(graph), ] for gnn_model in gnn_models: keras_model = gnn_model[1] ens = Ensemble(keras_model, n_estimators=2, n_predictions=5) with pytest.raises(ValueError): # must specify the optimizer to use ens.compile( optimizer=None, loss=categorical_crossentropy, weighted_metrics=["acc"] ) # Repeat for BaggingEnsemble ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=5) with pytest.raises(ValueError): # must specify the optimizer to use ens.compile( optimizer=None, loss=categorical_crossentropy, weighted_metrics=["acc"] )
def test_compile(): tf.keras.backend.clear_session() graph = example_graph_1(feature_size=10) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph), create_HinSAGE_model(graph), create_graphSAGE_model(graph, link_prediction=True), create_HinSAGE_model(graph, link_prediction=True), create_GCN_model(graph), create_GAT_model(graph), ] for gnn_model in gnn_models: keras_model = gnn_model[1] ens = Ensemble(keras_model, n_estimators=2, n_predictions=5) # These are actually raised by keras but I added a check just to make sure with pytest.raises(ValueError): ens.compile(optimizer=Adam(), loss=None, weighted_metrics=["acc"]) with pytest.raises(ValueError): # must specify the optimizer to use ens.compile(optimizer=None, loss=categorical_crossentropy, weighted_metrics=["acc"]) with pytest.raises( ValueError ): # The metric is made up so it should raise ValueError ens.compile( optimizer=Adam(), loss=categorical_crossentropy, weighted_metrics=["f1_accuracy"], ) # # Repeat for BaggingEnsemble ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=5) # These are actually raised by keras but I added a check just to make sure with pytest.raises(ValueError): ens.compile(optimizer=Adam(), loss=None, weighted_metrics=["acc"]) with pytest.raises(ValueError): # must specify the optimizer to use ens.compile(optimizer=None, loss=categorical_crossentropy, weighted_metrics=["acc"]) with pytest.raises( ValueError ): # The metric is made up so it should raise ValueError ens.compile( optimizer=Adam(), loss=categorical_crossentropy, weighted_metrics=["f1_accuracy"], )
def test_deprecated_methods(): tf.keras.backend.clear_session() train_data = np.array([1, 2]) train_targets = np.array([[1, 0], [0, 1]]) graph = example_graph_1(feature_size=2) _, keras_model, gen, train_gen = create_GAT_model(graph) ensemble = Ensemble(keras_model, n_estimators=1, n_predictions=1) bagging = BaggingEnsemble(keras_model, n_estimators=1, n_predictions=1) models = [ensemble, bagging] for model in models: model.compile(optimizer=Adam(), loss=binary_crossentropy) # check that each of the generator methods gives a warning, and also seems to behave like the # non-deprecated method with pytest.warns(DeprecationWarning, match="'fit_generator' .* 'fit'"): ens_history = ensemble.fit_generator(train_gen, epochs=2, verbose=0) assert len(ens_history) == 1 assert len(ens_history[0].history["loss"]) == 2 with pytest.warns(DeprecationWarning, match="'fit_generator' .* 'fit'"): bag_history = bagging.fit_generator( gen, train_data, train_targets, epochs=2, verbose=0 ) assert len(bag_history) == 1 assert len(bag_history[0].history["loss"]) == 2 for model in models: with pytest.warns( DeprecationWarning, match="'evaluate_generator' .* 'evaluate'" ): eval_result = model.evaluate_generator(train_gen, verbose=0) np.testing.assert_array_equal(eval_result, model.evaluate(train_gen, verbose=0)) with pytest.warns(DeprecationWarning, match="'predict_generator' .* 'predict'"): pred_result = model.predict_generator(train_gen, verbose=0) np.testing.assert_array_equal(pred_result, model.predict(train_gen, verbose=0))
# Stack the GraphSAGE and prediction layers into a Keras model. # In[20]: base_model = keras.Model(inputs=x_inp, outputs=prediction) # Now we create the ensemble based on `base_model` we just created. # In[21]: model = BaggingEnsemble( model=base_model, n_estimators=n_estimators, n_predictions=n_predictions ) # We need to `compile` the model specifying the optimiser, loss function, and metrics to use. # In[22]: model.compile( optimizer=keras.optimizers.Adam(lr=1e-3), loss=keras.losses.binary_crossentropy, weighted_metrics=["acc"], )
def test_predict_generator_link_prediction(): tf.keras.backend.clear_session() edge_ids_test = np.array([[1, 2], [2, 3], [1, 3]]) graph = example_graph_1(feature_size=2) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph, link_prediction=True), create_HinSAGE_model(graph, link_prediction=True), ] for gnn_model in gnn_models: keras_model = gnn_model[1] generator = gnn_model[2] ens = Ensemble(keras_model, n_estimators=2, n_predictions=1) ens.compile(optimizer=Adam(), loss=binary_crossentropy, weighted_metrics=["acc"]) test_gen = generator.flow(edge_ids_test) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.predict_generator(generator=test_gen, predict_data=edge_ids_test) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_predictions = ens.predict_generator(test_gen, summarise=True) print("test_predictions shape {}".format(test_predictions.shape)) assert len(test_predictions) == len(edge_ids_test) assert test_predictions.shape[1] == 1 test_predictions = ens.predict_generator(test_gen, summarise=False) assert test_predictions.shape[0] == ens.n_estimators assert test_predictions.shape[1] == ens.n_predictions assert test_predictions.shape[2] == len(edge_ids_test) assert test_predictions.shape[3] == 1 # # Repeat for BaggingEnsemble ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=1) ens.compile(optimizer=Adam(), loss=binary_crossentropy, weighted_metrics=["acc"]) test_gen = generator.flow(edge_ids_test) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.predict_generator(generator=test_gen, predict_data=edge_ids_test) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_predictions = ens.predict_generator(test_gen, summarise=True) print("test_predictions shape {}".format(test_predictions.shape)) assert len(test_predictions) == len(edge_ids_test) assert test_predictions.shape[1] == 1 test_predictions = ens.predict_generator(test_gen, summarise=False) assert test_predictions.shape[0] == ens.n_estimators assert test_predictions.shape[1] == ens.n_predictions assert test_predictions.shape[2] == len(edge_ids_test) assert test_predictions.shape[3] == 1
def test_evaluate_generator_link_prediction(): tf.keras.backend.clear_session() edge_ids_test = np.array([[1, 2], [2, 3], [1, 3]]) edge_labels_test = np.array([1, 1, 0]) graph = example_graph_1(feature_size=4) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph, link_prediction=True), create_HinSAGE_model(graph, link_prediction=True), ] for gnn_model in gnn_models: keras_model = gnn_model[1] generator = gnn_model[2] ens = Ensemble(keras_model, n_estimators=2, n_predictions=1) ens.compile(optimizer=Adam(), loss=binary_crossentropy, weighted_metrics=["acc"]) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.evaluate_generator( generator=generator, test_data=edge_ids_test, test_targets=edge_labels_test, ) with pytest.raises(ValueError): ens.evaluate_generator( generator=generator, test_data=edge_labels_test, test_targets=None, # must give test_targets ) with pytest.raises(ValueError): ens.evaluate_generator( generator=generator.flow(edge_ids_test, edge_labels_test), test_data=edge_ids_test, test_targets=edge_labels_test, ) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_metrics_mean, test_metrics_std = ens.evaluate_generator( generator.flow(edge_ids_test, edge_labels_test)) assert len(test_metrics_mean) == len(test_metrics_std) assert len(test_metrics_mean.shape) == 1 assert len(test_metrics_std.shape) == 1 # # Repeat for BaggingEnsemble ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=1) ens.compile(optimizer=Adam(), loss=binary_crossentropy, weighted_metrics=["acc"]) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.evaluate_generator( generator=generator, test_data=edge_ids_test, test_targets=edge_labels_test, ) with pytest.raises(ValueError): ens.evaluate_generator( generator=generator, test_data=edge_labels_test, test_targets=None, # must give test_targets ) with pytest.raises(ValueError): ens.evaluate_generator( generator=generator.flow(edge_ids_test, edge_labels_test), test_data=edge_ids_test, test_targets=edge_labels_test, ) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_metrics_mean, test_metrics_std = ens.evaluate_generator( generator.flow(edge_ids_test, edge_labels_test)) assert len(test_metrics_mean) == len(test_metrics_std) assert len(test_metrics_mean.shape) == 1 assert len(test_metrics_std.shape) == 1
def test_predict_generator(): tf.keras.backend.clear_session() # test_data = np.array([[0, 0], [1, 1], [0.8, 0.8]]) test_data = np.array([4, 5, 6]) test_targets = np.array([[1, 0], [0, 1], [0, 1]]) graph = example_graph_1(feature_size=2) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph), create_HinSAGE_model(graph), create_GCN_model(graph), create_GAT_model(graph), ] for i, gnn_model in enumerate(gnn_models): keras_model = gnn_model[1] generator = gnn_model[2] ens = Ensemble(keras_model, n_estimators=2, n_predictions=2) ens.compile(optimizer=Adam(), loss=categorical_crossentropy, weighted_metrics=["acc"]) test_gen = generator.flow(test_data) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.predict_generator(generator=test_gen, predict_data=test_data) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_predictions = ens.predict_generator(test_gen, summarise=True) print("test_predictions shape {}".format(test_predictions.shape)) if i > 1: # GAT and GCN are full batch so the batch dimension is 1 assert len(test_predictions) == 1 assert test_predictions.shape[1] == test_targets.shape[0] else: assert len(test_predictions) == len(test_data) assert test_predictions.shape[-1] == test_targets.shape[-1] test_predictions = ens.predict_generator(test_gen, summarise=False) assert test_predictions.shape[0] == ens.n_estimators assert test_predictions.shape[1] == ens.n_predictions if i > 1: assert test_predictions.shape[2] == 1 else: assert test_predictions.shape[2] == len(test_data) assert test_predictions.shape[-1] == test_targets.shape[-1] # # Repeat for BaggingEnsemble ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=2) ens.compile(optimizer=Adam(), loss=categorical_crossentropy, weighted_metrics=["acc"]) test_gen = generator.flow(test_data) # Check that passing invalid parameters is handled correctly. We will not check error handling for those # parameters that Keras will be responsible for. with pytest.raises(ValueError): ens.predict_generator(generator=test_gen, predict_data=test_data) # We won't train the model instead use the initial random weights to test # the evaluate_generator method. test_predictions = ens.predict_generator(test_gen, summarise=True) print("test_predictions shape {}".format(test_predictions.shape)) if i > 1: # GAT and GCN are full batch so the batch dimension is 1 assert len(test_predictions) == 1 assert test_predictions.shape[1] == test_targets.shape[0] else: assert len(test_predictions) == len(test_data) assert test_predictions.shape[-1] == test_targets.shape[-1] test_predictions = ens.predict_generator(test_gen, summarise=False) assert test_predictions.shape[0] == ens.n_estimators assert test_predictions.shape[1] == ens.n_predictions if i > 1: assert test_predictions.shape[2] == 1 else: assert test_predictions.shape[2] == len(test_data) assert test_predictions.shape[-1] == test_targets.shape[-1]
def test_BaggingEnsemble_fit_generator(): tf.keras.backend.clear_session() train_data = np.array([1, 2]) train_targets = np.array([[1, 0], [0, 1]]) graph = example_graph_1(feature_size=10) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph), create_HinSAGE_model(graph), create_GCN_model(graph), create_GAT_model(graph), ] for gnn_model in gnn_models: keras_model = gnn_model[1] generator = gnn_model[2] train_gen = gnn_model[3] ens = BaggingEnsemble(keras_model, n_estimators=2, n_predictions=1) ens.compile(optimizer=Adam(), loss=categorical_crossentropy, weighted_metrics=["acc"]) ens.fit_generator( generator=generator, train_data=train_data, train_targets=train_targets, epochs=1, validation_data=train_gen, verbose=0, shuffle=False, ) # This is a BaggingEnsemble so the generator in the below call is of the wrong type. with pytest.raises(ValueError): ens.fit_generator( train_gen, train_data=train_data, train_targets=train_targets, epochs=10, verbose=0, shuffle=False, ) with pytest.raises(ValueError): ens.fit_generator( generator=generator, train_data=train_data, train_targets=None, # Should not be None epochs=10, validation_data=train_gen, verbose=0, shuffle=False, ) with pytest.raises(ValueError): ens.fit_generator( generator=generator, train_data=None, train_targets=None, epochs=10, validation_data=None, verbose=0, shuffle=False, ) with pytest.raises(ValueError): ens.fit_generator( generator=generator, train_data=train_data, train_targets=train_targets, epochs=10, validation_data=None, verbose=0, shuffle=False, bag_size= -1, # should be positive integer smaller than or equal to len(train_data) or None ) with pytest.raises(ValueError): ens.fit_generator( generator=generator, train_data=train_data, train_targets=train_targets, epochs=10, validation_data=None, verbose=0, shuffle=False, bag_size=10, # larger than the number of training points )
def test_ensemble_init_parameters(): tf.keras.backend.clear_session() graph = example_graph_1(feature_size=10) base_model, keras_model, generator, train_gen = create_graphSAGE_model( graph) # base_model, keras_model, generator, train_gen gnn_models = [ create_graphSAGE_model(graph), create_HinSAGE_model(graph), create_graphSAGE_model(graph, link_prediction=True), create_HinSAGE_model(graph, link_prediction=True), create_GCN_model(graph), create_GAT_model(graph), ] for gnn_model in gnn_models: base_model = gnn_model[0] keras_model = gnn_model[1] # Test mixed types with pytest.raises(ValueError): Ensemble(base_model, n_estimators=3, n_predictions=3) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=1, n_predictions=0) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=1, n_predictions=-3) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=1, n_predictions=1.7) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=0, n_predictions=11) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=-8, n_predictions=11) with pytest.raises(ValueError): Ensemble(keras_model, n_estimators=2.5, n_predictions=11) ens = Ensemble(keras_model, n_estimators=7, n_predictions=10) assert len(ens.models) == 7 assert ens.n_estimators == 7 assert ens.n_predictions == 10 # # Repeat for BaggingEnsemble # Test mixed types with pytest.raises(ValueError): BaggingEnsemble(base_model, n_estimators=3, n_predictions=3) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=1, n_predictions=0) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=1, n_predictions=-3) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=1, n_predictions=1.7) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=0, n_predictions=11) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=-8, n_predictions=11) with pytest.raises(ValueError): BaggingEnsemble(keras_model, n_estimators=2.5, n_predictions=11) ens = BaggingEnsemble(keras_model, n_estimators=7, n_predictions=10) assert len(ens.models) == 7 assert ens.n_estimators == 7 assert ens.n_predictions == 10