def test_explain(self): model = Classifier(**self.default_config()) train_sample = self.dataset.sample(n=self.n_sample) valid_sample = self.dataset.sample(n=self.n_sample) model.fit(train_sample.Text, train_sample.Target) explanations = model.explain(valid_sample.Text) normal_predictions = model.predict(valid_sample.Text) explanation_preds = [e["prediction"] for e in explanations] # check that the process of turning on explain does not change the preds self.assertEqual(explanation_preds, list(normal_predictions)) self.assertEqual(len(explanation_preds), len(train_sample.Text)) self.assertEqual(type(explanations[0]["token_ends"]), list) self.assertEqual(type(explanations[0]["token_starts"]), list) self.assertEqual(type(explanations[0]["explanation"]), dict) self.assertEqual(len(explanations[0]["token_starts"]), len(explanations[0]["explanation"][0])) self.assertEqual(len(explanations[0]["token_ends"]), len(explanations[0]["explanation"][0]))
dataset = StanfordSentimentTreebank(nrows=1000).dataframe model = Classifier(interpolate_pos_embed=False, n_epochs=1, batch_size=2, lr_warmup=0.1, val_size=0.0, max_length=64, prefit_init=False, base_model=GPTModel) trainX, testX, trainY, testY = train_test_split(dataset.Text.values, dataset.Target.values, test_size=0.3, random_state=42) model.fit(trainX, trainY) samples = model.explain(testX) words = [] probs = [] clfs = [] for sample, text in zip(samples, testX): sample_words = [] sample_probs = [] for s, e, p in zip(sample["token_starts"], sample["token_ends"], sample['explanation'][1]): sample_words.append(text[s:e]) sample_probs.append(p) print(list(zip(sample_words, sample_probs)), text) words.append(sample_words) probs.append(sample_probs) clfs.append(sample["prediction"])