def test_nlp_padded_valid(self): num_words = 1024 (x_train, y_train), (x_test, y_test) = TestUtil.get_random_variable_length_dataset( max_value=num_words) explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1) counter = CountVectoriser(num_words) tfidf_transformer = TfidfTransformer() explained_model = Pipeline([('counts', counter), ('tfidf', tfidf_transformer), ('model', explained_model)]) explained_model.fit(x_train, y_train) model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True, num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = WordDropMasking() loss = binary_crossentropy explainer = CXPlain(explained_model, model_builder, masking_operation, loss) x_train = pad_sequences(x_train, padding="post", truncating="post", dtype=int) x_test = pad_sequences(x_test, padding="post", truncating="post", dtype=int, maxlen=x_train.shape[1]) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_boston_housing_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=3, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error for num_models in [2, 5, 10]: explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=num_models) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median, confidence = explainer.predict(x_test, confidence_level=0.95) self.assertTrue(median.shape == x_test.shape) self.assertTrue(confidence.shape == x_test.shape + (2, )) # Flatten predictions for iteration below. median = median.reshape((len(x_test), -1)) confidence = confidence.reshape((len(x_test), -1, 2)) for sample_idx in range(len(x_test)): for feature_idx in range(len(x_test[sample_idx])): self.assertTrue(confidence[sample_idx][feature_idx][0] <= median[sample_idx][feature_idx] <= confidence[sample_idx][feature_idx][1]) self.assertTrue( confidence[sample_idx][feature_idx][0] >= 0) self.assertTrue( confidence[sample_idx][feature_idx][1] >= 0)
def test_mnist_unet_valid(self): num_subsamples = 100 (x_train, y_train), (x_test, y_test) = TestUtil.get_mnist(flattened=False, num_subsamples=num_subsamples) explained_model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64, 32), random_state=1) explained_model.fit(x_train.reshape((len(x_train), -1)), y_train) masking_operation = ZeroMasking() loss = categorical_crossentropy downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)] with_bns = [ True if i % 2 == 0 else False for i in range(len(downsample_factors)) ] for downsample_factor, with_bn in zip(downsample_factors, with_bns): model_builder = UNetModelBuilder(downsample_factor, num_layers=2, num_units=64, activation="relu", p_dropout=0.2, verbose=0, batch_size=256, learning_rate=0.001, num_epochs=2, early_stopping_patience=128, with_bn=with_bn) explainer = CXPlain(explained_model, model_builder, masking_operation, loss, downsample_factors=downsample_factor, flatten_for_explained_model=True) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_boston_housing_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error explainer = CXPlain(explained_model, model_builder, masking_operation, loss) explainer.fit(x_train, y_train) self.assertEqual(explainer.prediction_model.output_shape, (None, np.prod(x_test.shape[1:]))) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)
def test_time_series_valid(self): num_samples = 1024 fixed_length = 99 (x_train, y_train), (x_test, y_test) = TestUtil.get_random_fixed_length_dataset(num_samples=num_samples, fixed_length=fixed_length) model_builder = RNNModelBuilder(with_embedding=False, num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) explained_model = MLPClassifier() explained_model.fit(x_train.reshape((-1, np.prod(x_train.shape[1:]))), y_train) masking_operation = ZeroMasking() loss = binary_crossentropy explainer = CXPlain(explained_model, model_builder, masking_operation, loss, flatten_for_explained_model=True) explainer.fit(x_train, y_train) eval_score = explainer.score(x_test, y_test) train_score = explainer.get_last_fit_score() median = explainer.predict(x_test) self.assertTrue(median.shape == x_test.shape)