def test_boston_housing_load_save_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error num_models_settings = [1, 2] for num_models in num_models_settings: explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=num_models) explainer.fit(x_train, y_train) median_1 = explainer.predict(x_test) tmp_dir_name = tempfile.mkdtemp() explainer.save(tmp_dir_name) with self.assertRaises(ValueError): explainer.save(tmp_dir_name, overwrite=False) explainer.save(tmp_dir_name, overwrite=True) explainer.load(tmp_dir_name) median_2 = explainer.predict(x_test) self.assertTrue(np.array_equal(median_1, median_2)) shutil.rmtree(tmp_dir_name) # Cleanup.
def test_boston_housing_no_fit_invalid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error explainer = CXPlain(explained_model, model_builder, masking_operation, loss) with self.assertRaises(AssertionError): explainer.predict(x_test, y_test) with self.assertRaises(AssertionError): explainer.score(x_test, y_test)
def test_boston_housing_confidence_level_invalid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=3, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error num_models = 2 explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=num_models) explainer.fit(x_train, y_train) invalid_confidence_levels = [1.01, -0.5, -0.01] for confidence_level in invalid_confidence_levels: with self.assertRaises(ValueError): explainer.predict(x_test, confidence_level=confidence_level)
def test_time_series_valid(self): num_samples = 1024 fixed_length = 99 (x_train, y_train), (x_test, y_test) = TestUtil.get_random_fixed_length_dataset( num_samples=num_samples, fixed_length=fixed_length) model_builder = RNNModelBuilder(with_embedding=False, num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) explained_model = MLPClassifier() explained_model.fit(x_train.reshape((-1, np.prod(x_train.shape[1:]))), y_train) masking_operation = ZeroMasking() loss = binary_crossentropy explainer = CXPlain(explained_model, model_builder, masking_operation, loss, flatten_for_explained_model=True) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_boston_housing_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error explainer = CXPlain(explained_model, model_builder, masking_operation, loss) explainer.fit(x_train, y_train) self.assertEqual(explainer.prediction_model.output_shape, (None, np.prod(x_test.shape[1:]))) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_imdb_padded_valid(self): num_samples = 32 num_words = 1024 (x_train, y_train), (x_test, y_test) = TestUtil.get_imdb(word_dictionary_size=num_words, num_subsamples=num_samples) explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1) counter = CountVectoriser(num_words) tfidf_transformer = TfidfTransformer() explained_model = Pipeline([('counts', counter), ('tfidf', tfidf_transformer), ('model', explained_model)]) explained_model.fit(x_train, y_train) model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True, num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = WordDropMasking() loss = binary_crossentropy explainer = CXPlain(explained_model, model_builder, masking_operation, loss) x_train = pad_sequences(x_train, padding="post", truncating="post", dtype=int) x_test = pad_sequences(x_test, padding="post", truncating="post", dtype=int, maxlen=x_train.shape[1]) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_mnist_unet_valid(self): num_subsamples = 100 (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples) explained_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64, 32), random_state=1) explained_model.fit(x_train.reshape((len(x_train), -1)), y_train) masking_operation = ZeroMasking() loss = categorical_crossentropy downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)] with_bns = [True if i % 2 == 0 else False for i in range(len(downsample_factors))] for downsample_factor, with_bn in zip(downsample_factors, with_bns): model_builder = UNetModelBuilder(downsample_factor, num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=2, early_stopping_patience=128, with_bn=with_bn) explainer = CXPlain(explained_model, model_builder, masking_operation, loss, downsample_factors=downsample_factor, flatten_for_explained_model=True) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_mnist_unet_with_shape_valid(self): num_subsamples = 100 (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples) explained_model_builder = MLPModelBuilder(num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) input_shape = x_train.shape[1:] input_layer = Input(shape=input_shape) last_layer = Flatten()(input_layer) last_layer = explained_model_builder.build(last_layer) last_layer = Dense(y_train.shape[-1], activation="softmax")(last_layer) explained_model = Model(input_layer, last_layer) explained_model.compile(loss="categorical_crossentropy", optimizer="adam") explained_model.fit(x_train, y_train) masking_operation = ZeroMasking() loss = categorical_crossentropy downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)] with_bns = [ True if i % 2 == 0 else False for i in range(len(downsample_factors)) ] for downsample_factor, with_bn in zip(downsample_factors, with_bns): model_builder = UNetModelBuilder(downsample_factor, num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=2, early_stopping_patience=128, with_bn=with_bn) explainer = CXPlain(explained_model, model_builder, masking_operation, loss, downsample_factors=downsample_factor) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_mnist_valid(self): num_subsamples = 100 (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples) explained_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64, 32), random_state=1) explained_model.fit(x_train.reshape((len(x_train), -1)), y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=3, early_stopping_patience=128) masking_operation = ZeroMasking() loss = categorical_crossentropy downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)] for downsample_factor in downsample_factors: explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=2, downsample_factors=downsample_factor, flatten_for_explained_model=True) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median, confidence = explainer.predict(x_test, confidence_level=0.95) self.assertTrue(median.shape == x_test.shape) self.assertTrue(confidence.shape == x_test.shape[:-1] + (2, )) # Flatten predictions for iteration below. median = median.reshape((len(x_test), -1)) confidence = confidence.reshape((len(x_test), -1, 2)) for sample_idx in range(len(x_test)): for feature_idx in range(len(x_test[sample_idx])): self.assertTrue(confidence[sample_idx][feature_idx][0] <= median[sample_idx][feature_idx] <= confidence[sample_idx][feature_idx][1]) self.assertTrue( confidence[sample_idx][feature_idx][0] >= 0) self.assertTrue( confidence[sample_idx][feature_idx][1] >= 0)
def test_boston_housing_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=3, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error for num_models in [2, 5, 10]: explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=num_models) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median, confidence = explainer.predict(x_test, confidence_level=0.95) self.assertTrue(median.shape == x_test.shape) self.assertTrue(confidence.shape == x_test.shape + (2, )) # Flatten predictions for iteration below. median = median.reshape((len(x_test), -1)) confidence = confidence.reshape((len(x_test), -1, 2)) for sample_idx in range(len(x_test)): for feature_idx in range(len(x_test[sample_idx])): self.assertTrue(confidence[sample_idx][feature_idx][0] <= median[sample_idx][feature_idx] <= confidence[sample_idx][feature_idx][1]) self.assertTrue( confidence[sample_idx][feature_idx][0] >= 0) self.assertTrue( confidence[sample_idx][feature_idx][1] >= 0)
def test_mnist_confidence_levels_valid(self): num_subsamples = 100 (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples) explained_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64, 32), random_state=1) explained_model.fit(x_train.reshape((len(x_train), -1)), y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=3, early_stopping_patience=128) masking_operation = ZeroMasking() loss = categorical_crossentropy confidence_levels = [0.0, 1.0, 1.01, -0.01] for confidence_level in confidence_levels: downsample_factor = (2, 2) explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=2, downsample_factors=downsample_factor, flatten_for_explained_model=True) explainer.fit(x_train, y_train) with self.assertRaises(ValueError): _ = explainer.predict(x_test, confidence_level=confidence_level)