Esempio n. 1
0
    def test_explain(self):
        model = Classifier(**self.default_config())
        train_sample = self.dataset.sample(n=self.n_sample)
        valid_sample = self.dataset.sample(n=self.n_sample)
        model.fit(train_sample.Text, train_sample.Target)
        explanations = model.explain(valid_sample.Text)
        normal_predictions = model.predict(valid_sample.Text)
        explanation_preds = [e["prediction"] for e in explanations]

        # check that the process of turning on explain does not change the preds
        self.assertEqual(explanation_preds, list(normal_predictions))
        self.assertEqual(len(explanation_preds), len(train_sample.Text))
        self.assertEqual(type(explanations[0]["token_ends"]), list)
        self.assertEqual(type(explanations[0]["token_starts"]), list)
        self.assertEqual(type(explanations[0]["explanation"]), dict)
        self.assertEqual(len(explanations[0]["token_starts"]), len(explanations[0]["explanation"][0]))
        self.assertEqual(len(explanations[0]["token_ends"]), len(explanations[0]["explanation"][0]))
    dataset = StanfordSentimentTreebank(nrows=1000).dataframe
    model = Classifier(interpolate_pos_embed=False,
                       n_epochs=1,
                       batch_size=2,
                       lr_warmup=0.1,
                       val_size=0.0,
                       max_length=64,
                       prefit_init=False,
                       base_model=GPTModel)

    trainX, testX, trainY, testY = train_test_split(dataset.Text.values,
                                                    dataset.Target.values,
                                                    test_size=0.3,
                                                    random_state=42)
    model.fit(trainX, trainY)
    samples = model.explain(testX)
    words = []
    probs = []
    clfs = []

    for sample, text in zip(samples, testX):
        sample_words = []
        sample_probs = []
        for s, e, p in zip(sample["token_starts"], sample["token_ends"],
                           sample['explanation'][1]):
            sample_words.append(text[s:e])
            sample_probs.append(p)
        print(list(zip(sample_words, sample_probs)), text)
        words.append(sample_words)
        probs.append(sample_probs)
        clfs.append(sample["prediction"])