def test_nlp_erroneous_rnn_args_invalid(self):
        num_words = 1024
        (x_train,
         y_train), (x_test,
                    y_test) = TestUtil.get_random_variable_length_dataset(
                        max_value=num_words)

        explained_model = RandomForestClassifier(n_estimators=64,
                                                 max_depth=5,
                                                 random_state=1)

        counter = CountVectoriser(num_words)
        tfidf_transformer = TfidfTransformer()

        explained_model = Pipeline([('counts', counter),
                                    ('tfidf', tfidf_transformer),
                                    ('model', explained_model)])
        explained_model.fit(x_train, y_train)

        with self.assertRaises(ValueError):
            _ = RNNModelBuilder(
                with_embedding=True,
                verbose=0)  # Must also specify the embedding_size argument.

        model_builder = RNNModelBuilder(embedding_size=num_words,
                                        with_embedding=True,
                                        verbose=0)

        input_layer = Input(shape=(10, 2))
        with self.assertRaises(ValueError):
            model_builder.build(input_layer)

        input_layer = Input(shape=(10, 3))
        with self.assertRaises(ValueError):
            model_builder.build(input_layer)
    def test_imdb_padded_valid(self):
        num_samples = 32
        num_words = 1024
        (x_train, y_train), (x_test, y_test) = TestUtil.get_imdb(word_dictionary_size=num_words,
                                                                 num_subsamples=num_samples)

        explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1)

        counter = CountVectoriser(num_words)
        tfidf_transformer = TfidfTransformer()

        explained_model = Pipeline([('counts', counter),
                                    ('tfidf', tfidf_transformer),
                                    ('model', explained_model)])
        explained_model.fit(x_train, y_train)

        model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True,
                                        num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = WordDropMasking()
        loss = binary_crossentropy
        explainer = CXPlain(explained_model, model_builder, masking_operation, loss)

        x_train = pad_sequences(x_train, padding="post", truncating="post", dtype=int)
        x_test = pad_sequences(x_test, padding="post", truncating="post", dtype=int, maxlen=x_train.shape[1])

        explainer.fit(x_train, y_train)
        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)
Beispiel #3
0
    def test_causal_loss_padded_input(self):
        models = TestUtil.get_classification_models()

        batch_size = 32
        num_samples = 1024
        num_words = 1024

        (x_train, y_train), (x_test, y_test) = \
            TestUtil.get_random_variable_length_dataset(num_samples=num_samples, max_value=num_words)
        x, y = np.concatenate([x_train, x_test],
                              axis=0), np.concatenate([y_train, y_test],
                                                      axis=0)

        self.assertEqual(x.shape[0], num_samples)

        for explained_model in models:
            counter = CountVectoriser(num_words)
            tfidf_transformer = TfidfTransformer()

            explained_model = Pipeline([('counts', counter),
                                        ('tfidf', tfidf_transformer),
                                        ('model', explained_model)])
            TestUtil.fit_proxy(explained_model, x, y)
            masking = WordDropMasking()

            x = pad_sequences(x, padding="post", truncating="post", dtype=int)

            _, y_pred, all_y_pred_imputed = masking.get_predictions_after_masking(
                explained_model,
                x,
                y,
                batch_size=batch_size,
                downsample_factors=(1, ),
                flatten=False)
            auxiliary_outputs = y_pred
            all_but_one_auxiliary_outputs = all_y_pred_imputed
            all_but_one_auxiliary_outputs = TestUtil.split_auxiliary_outputs_on_feature_dim(
                all_but_one_auxiliary_outputs)

            delta_errors = calculate_delta_errors(
                y,
                auxiliary_outputs,
                all_but_one_auxiliary_outputs,
                NumpyInterface.binary_crossentropy,
                math_ops=NumpyInterface)

            # Ensure correct delta error dimensionality.
            self.assertEqual(delta_errors.shape, (num_samples, x.shape[1]))
    def test_nlp_not_padded_invalid(self):
        num_words = 1024
        (x_train, y_train), (_, _) = TestUtil.get_random_variable_length_dataset(max_value=num_words)

        explained_model = RandomForestClassifier(n_estimators=64, max_depth=5, random_state=1)

        counter = CountVectoriser(num_words)
        tfidf_transformer = TfidfTransformer()

        explained_model = Pipeline([('counts', counter),
                                    ('tfidf', tfidf_transformer),
                                    ('model', explained_model)])
        explained_model.fit(x_train, y_train)

        model_builder = RNNModelBuilder(embedding_size=num_words, with_embedding=True,
                                        num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = WordDropMasking()
        loss = binary_crossentropy
        explainer = CXPlain(explained_model, model_builder, masking_operation, loss)

        with self.assertRaises(ValueError):
            explainer.fit(x_train, y_train)