def test_nlp_padded_valid(self):
        num_words = 1024
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_random_variable_length_dataset(
                        max_value=num_words)

        explained_model = RandomForestClassifier(n_estimators=64,
                                                 max_depth=5,
                                                 random_state=1)

        counter = CountVectoriser(num_words)
        tfidf_transformer = TfidfTransformer()

        explained_model = Pipeline([('counts', counter),
                                    ('tfidf', tfidf_transformer),
                                    ('model', explained_model)])
        explained_model.fit(x_train, y_train)

        model_builder = RNNModelBuilder(embedding_size=num_words,
                                        with_embedding=True,
                                        num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=2,
                                        early_stopping_patience=128)
        masking_operation = WordDropMasking()
        loss = binary_crossentropy
        explainer = CXPlain(explained_model, model_builder, masking_operation,
                            loss)

        x_train = pad_sequences(x_train,
                                padding="post",
                                truncating="post",
                                dtype=int)
        x_test = pad_sequences(x_test,
                               padding="post",
                               truncating="post",
                               dtype=int,
                               maxlen=x_train.shape[1])

        explainer.fit(x_train, y_train)
        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)
Exemple #2
0
    def test_boston_housing_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        for num_models in [2, 5, 10]:
            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                num_models=num_models)

            explainer.fit(x_train, y_train)
            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median, confidence = explainer.predict(x_test,
                                                   confidence_level=0.95)

            self.assertTrue(median.shape == x_test.shape)
            self.assertTrue(confidence.shape == x_test.shape + (2, ))

            # Flatten predictions for iteration below.
            median = median.reshape((len(x_test), -1))
            confidence = confidence.reshape((len(x_test), -1, 2))

            for sample_idx in range(len(x_test)):
                for feature_idx in range(len(x_test[sample_idx])):
                    self.assertTrue(confidence[sample_idx][feature_idx][0] <=
                                    median[sample_idx][feature_idx] <=
                                    confidence[sample_idx][feature_idx][1])
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][0] >= 0)
                    self.assertTrue(
                        confidence[sample_idx][feature_idx][1] >= 0)
    def test_mnist_unet_valid(self):
        num_subsamples = 100
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_mnist(flattened=False,
                                                 num_subsamples=num_subsamples)

        explained_model = MLPClassifier(solver='lbfgs',
                                        alpha=1e-5,
                                        hidden_layer_sizes=(64, 32),
                                        random_state=1)
        explained_model.fit(x_train.reshape((len(x_train), -1)), y_train)
        masking_operation = ZeroMasking()
        loss = categorical_crossentropy

        downsample_factors = [(2, 2), (4, 4), (4, 7), (7, 4), (7, 7)]
        with_bns = [
            True if i % 2 == 0 else False
            for i in range(len(downsample_factors))
        ]
        for downsample_factor, with_bn in zip(downsample_factors, with_bns):
            model_builder = UNetModelBuilder(downsample_factor,
                                             num_layers=2,
                                             num_units=64,
                                             activation="relu",
                                             p_dropout=0.2,
                                             verbose=0,
                                             batch_size=256,
                                             learning_rate=0.001,
                                             num_epochs=2,
                                             early_stopping_patience=128,
                                             with_bn=with_bn)

            explainer = CXPlain(explained_model,
                                model_builder,
                                masking_operation,
                                loss,
                                downsample_factors=downsample_factor,
                                flatten_for_explained_model=True)

            explainer.fit(x_train, y_train)
            eval_score = explainer.score(x_test, y_test)
            train_score = explainer.get_last_fit_score()
            median = explainer.predict(x_test)
            self.assertTrue(median.shape == x_test.shape)
    def test_boston_housing_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error
        explainer = CXPlain(explained_model, model_builder, masking_operation, loss)

        explainer.fit(x_train, y_train)
        self.assertEqual(explainer.prediction_model.output_shape, (None, np.prod(x_test.shape[1:])))

        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)
    def test_time_series_valid(self):
        num_samples = 1024
        fixed_length = 99
        (x_train, y_train), (x_test, y_test) = TestUtil.get_random_fixed_length_dataset(num_samples=num_samples,
                                                                                        fixed_length=fixed_length)

        model_builder = RNNModelBuilder(with_embedding=False, num_layers=2, num_units=32,
                                        activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2,
                                        early_stopping_patience=128)

        explained_model = MLPClassifier()
        explained_model.fit(x_train.reshape((-1, np.prod(x_train.shape[1:]))), y_train)

        masking_operation = ZeroMasking()
        loss = binary_crossentropy
        explainer = CXPlain(explained_model, model_builder, masking_operation, loss,
                            flatten_for_explained_model=True)

        explainer.fit(x_train, y_train)
        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)