Example #1
0
    def test(self, df_test, verbosity=2):
        """Calculate and store as model attributes:
        Average classification accuracy using rationales (self.avg_accuracy),
        Average classification accuracy rationale complements
            (self.anti_accuracy)
        Average sparsity of rationales (self.avg_sparsity)

        :param df_test: dataframe containing test data labels, tokens, masks,
            and counts
        :type df_test: pandas dataframe
        :param verbosity: {0, 1, 2}, default 2
            If 0, does not log any output
            If 1, logs accuracy, anti-rationale accuracy, sparsity, and
            continuity scores
            If 2, displays a random test example with rationale and
            classification
        :type verbosity: int, optional
        """
        self.model.eval()
        accuracy = 0
        for i in range(len(df_test) // self.args.test_batch_size):
            test_batch = df_test.iloc[i * self.args.test_batch_size:(i + 1) *
                                      self.args.test_batch_size]
            batch_dict = generate_data(test_batch, self.args.cuda)
            batch_x_ = batch_dict["x"]
            batch_m_ = batch_dict["m"]
            batch_y_ = batch_dict["y"]
            predict, _, _ = self.model(batch_x_, batch_m_)

            # do a softmax on the predicted class probabilities
            _, y_pred = torch.max(predict, dim=1)

            accuracy += (y_pred == batch_y_).sum().item()

        self.avg_accuracy = accuracy / len(df_test)
        self.test_accs.append(self.avg_accuracy)

        if verbosity > 0:
            logging.info("train acc: %.4f, test acc: %.4f" %
                         (self.train_accs[-1], self.avg_accuracy))

        if self.args.save_best_model:
            if self.avg_accuracy > self.best_test_acc:
                logging.info("saving best classifier model and model stats")
                # save model
                torch.save(
                    self.model.state_dict(),
                    os.path.join(
                        self.args.model_folder_path,
                        self.args.model_prefix + "gen_classifier.pth",
                    ),
                )

        if self.avg_accuracy > self.best_test_acc:
            self.best_test_acc = self.avg_accuracy
            self.epochs_since_improv = 0
        else:
            self.epochs_since_improv += 1
    def explain_local(self, text: str, **kwargs) -> _create_local_explanation:
        """ Create a local explanation for a given text
        :param text: A segment of text
        :type text: str
        :param kwargs:
                preprocessor: an intialized preprocessor to tokenize the
                given text with .preprocess() and .decode_single() methods
                preprocessor: Ex. GlovePreprocessor or BertPreprocessor
                hard_importances: whether to generate "hard" important/ non-important rationales
                or float rationale scores, defaults to True
                hard_importances: bool, optional
        :return: local explanation object
        :rtype: DynamicLocalExplanation
        """
        model_args = self.model_config
        df_dummy_label = pd.DataFrame.from_dict({"labels": [0]})
        df_sentence = pd.concat(
            [df_dummy_label,
             self.preprocessor.preprocess([text.lower()])],
            axis=1)
        batch_dict = generate_data(df_sentence, self.model_config.cuda)
        x = batch_dict["x"]
        m = batch_dict["m"]
        predict_dict = self.predict(df_sentence)
        zs = predict_dict["rationale"]
        prediction = predict_dict["predict"]
        prediction_idx = prediction[0].max(0)[1]
        prediction = model_args.labels[prediction_idx]
        zs = np.array(zs.cpu())
        if not kwargs['hard_importances']:
            float_zs = self.model.get_z_scores(df_sentence)
            float_zs = float_zs[:, :, 1].detach()
            float_zs = np.array(float_zs.cpu())
            # set importances all words not selected as part of the rationale
            # to zero
            zs = zs * float_zs
            # generate human-readable tokens (individual words)
            seq_len = int(m.sum().item())
            ids = x[:seq_len][0]
        tokens = kwargs['preprocessor'].decode_single(ids)

        local_explanation = _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=zs.flatten(),
            method=str(type(self.model)),
            model_task="classification",
            features=tokens,
            classes=model_args.labels,
            predicted_label=prediction,
        )
        return local_explanation
Example #3
0
    def fit(self, df_train, df_test):
        """Train the classifier on the training data, with testing
        at the end of every epoch.

        :param df_train: training data containing labels, lists of word token
        ids, pad/word masks, and token counts for each training example
        :type df_train: pd.DataFrame
        :param df_test: testing data containing labels, lists of word token
        ids, pad/word masks, and token counts for each testing example
        :type df_test: pd.DataFrame
        """
        self.init_optimizer()

        total_train = len(df_train)
        indices = np.array(list(range(0, total_train)))

        for i in tqdm(range(self.num_epochs)):
            self.model.train()  # pytorch fn; sets module to train mode

            # shuffle the epoch
            np.random.shuffle(indices)

            total_train_acc = 0
            for i in range(total_train // self.args.train_batch_size):
                # sample a batch of data
                start = i * self.args.train_batch_size
                end = min((i + 1) * self.args.train_batch_size, total_train)
                batch = df_train.loc[indices[start:end]]
                batch_dict = generate_data(batch, self.args.cuda)
                batch_x_ = batch_dict["x"]
                batch_m_ = batch_dict["m"]
                batch_y_ = batch_dict["y"]

                losses, predict = self._train_one_step(batch_x_, batch_y_,
                                                       batch_m_)

                # calculate classification accuarcy
                _, y_pred = torch.max(predict, dim=1)

                acc = np.float((y_pred == batch_y_).sum().cpu().data.item())
                total_train_acc += acc

            total_acc_percent = total_train_acc / total_train
            self.train_accs.append(total_acc_percent)

            self.test(df_test)
            # stop training if there have been no improvements
            if self.epochs_since_improv > self.args.training_stop_thresh:
                break
Example #4
0
    def get_z_scores(self, df_test):
        """Get softmaxed rationale importances.

        :param df_test: dataframe containing test data labels, tokens, masks,
            and counts
        :type df_test: pd.DataFrame
        :return:
            z_scores: softmaxed rationale scores with dimension
                (batch_size, length)
        :rtype: torch.FloatTensor
        """
        batch_dict = generate_data(df_test, self.use_cuda)
        x_tokens = batch_dict["x"]
        mask = batch_dict["m"]
        z_scores, _, _ = self.generator(x_tokens, mask)
        z_scores = F.softmax(z_scores, dim=-1)

        return z_scores
    def predict(self, df_predict):
        """ Generate rationales, predictions using rationales, predictions using
        anti-rationales (complement of generated rationales), and introspective
        generator classifier predictions for given examples.

        :param df_predict: data containing labels, lists of word token
            ids, pad/word masks, and token counts for each testing example
        :type df_predict: pd.DataFrame
        :return: Dictionary with fields:
            "predict": predictions using generated rationales
            "anti_predict": predictions using complements of generated
                rationales
            "cls_predict": predictions from introspective generator,
            "rationale": mask indicating whether words were used in rationales,
        :rtype: dict
        """
        self.model.eval()
        self.model.training = False
        batch_dict = generate_data(df_predict, self.model_config.cuda)
        batch_x_ = batch_dict["x"]
        batch_m_ = batch_dict["m"]
        forward_dict = self.model.forward(batch_x_, batch_m_)
        predict = forward_dict["predict"]
        anti_predict = forward_dict["anti_predict"]
        cls_predict = forward_dict["cls_predict"]
        z = forward_dict["z"]
        predict = predict.detach()
        anti_predict = anti_predict.detach()
        cls_predict = cls_predict.detach()
        z = z.detach()
        predict_dict = {
            "predict": predict,
            "anti_predict": anti_predict,
            "cls_predict": cls_predict,
            "rationale": z,
        }
        self.model.training = True
        return predict_dict
Example #6
0
    def fit(self, df_train, df_test):
        """Train the model on the training data, with testing
        at the end of every epoch.

        :param df_train: training data containing labels, lists of word token
            ids, pad/word masks, and token counts for each training example
        :type df_train: pd.DataFrame
        :param df_test: testing data containing labels, lists of word token
            ids, pad/word masks, and token counts for each testing example
        :type df_test: pd.DataFrame
        """
        self._init_optimizers()
        self._init_rl_optimizers()

        total_train = len(df_train)
        indices = np.array(list(range(0, total_train)))

        for i in tqdm(range(self.num_epochs)):
            self.train()  # pytorch fn; sets module to train mode

            # shuffle the data in this epoch
            np.random.shuffle(indices)

            total_train_acc = 0
            for i in range(total_train // self.train_batch_size):
                # sample a batch of data
                start = i * self.train_batch_size
                end = min((i + 1) * self.train_batch_size, total_train)
                batch = df_train.loc[indices[start:end]]
                batch_dict = generate_data(batch, self.use_cuda)
                batch_x_ = batch_dict["x"]
                batch_m_ = batch_dict["m"]
                batch_y_ = batch_dict["y"]

                z_baseline = Variable(
                    torch.FloatTensor([float(np.mean(self.z_history_rewards))
                                       ]))
                if self.use_cuda:
                    z_baseline = z_baseline.cuda()

                losses, predict, anti_predict, cls_predict, z, z_rewards =\
                    self._train_one_step(batch_x_,
                                         batch_y_,
                                         z_baseline,
                                         batch_m_)

                z_batch_reward = np.mean(z_rewards.cpu().data.numpy())
                self.z_history_rewards.append(z_batch_reward)

                # calculate classification accuarcy
                _, y_pred = torch.max(predict, dim=1)

                acc = np.float((y_pred == batch_y_).sum().cpu().data.item())
                total_train_acc += acc

            total_acc_percent = total_train_acc / total_train
            self.train_accs.append(total_acc_percent)

            self.test(df_test)

            if self.epochs_since_improv > self.training_stop_thresh:
                break
Example #7
0
    def test(self, df_test, verbosity=2):
        """Calculate and store as model attributes:
        Average classification accuracy using rationales (self.avg_accuracy),
        Average classification accuracy rationale complements
            (self.anti_accuracy)
        Average sparsity of rationales (self.avg_sparsity)

        :param df_test: dataframe containing test data labels, tokens, masks,
            and counts
        :type df_test: pandas dataframe
        :param verbosity: {0, 1, 2}, default 2
            If 0, does not log any output
            If 1, logs accuracy, anti-rationale accuracy, sparsity, and
            continuity scores
            If 2, displays a random test example with rationale and
            classification
        :type verbosity: int, optional
        """
        self.eval()

        accuracy = 0
        anti_accuracy = 0
        sparsity_total = 0
        cont_total = 0

        for i in range(len(df_test) // self.test_batch_size):
            test_batch = df_test.iloc[i * self.test_batch_size:(i + 1) *
                                      self.test_batch_size]
            batch_dict = generate_data(test_batch, self.use_cuda)
            batch_x_ = batch_dict["x"]
            batch_m_ = batch_dict["m"]
            batch_y_ = batch_dict["y"]
            forward_dict = self.forward(batch_x_, batch_m_)
            predict = forward_dict["predict"]
            anti_predict = forward_dict["anti_predict"]
            z = forward_dict["z"]

            # do a softmax on the predicted class probabilities
            _, y_pred = torch.max(predict, dim=1)
            _, anti_y_pred = torch.max(anti_predict, dim=1)

            accuracy += (y_pred == batch_y_).sum().item()
            anti_accuracy += (anti_y_pred == batch_y_).sum().item()

            # calculate sparsity
            sparsity_ratios = self._get_sparsity(z, batch_m_)
            sparsity_total += sparsity_ratios.sum().item()

            cont_ratios = self._get_continuity(z, batch_m_)
            cont_total += cont_ratios.sum().item()

        self.avg_accuracy = accuracy / len(df_test)
        self.test_accs.append(self.avg_accuracy)
        self.avg_anti_accuracy = anti_accuracy / len(df_test)
        self.avg_sparsity = sparsity_total / len(df_test)
        self.avg_continuity = cont_total / len(df_test)

        if verbosity > 0:
            logging.info("test acc: %.4f test anti acc: %.4f" %
                         (self.avg_accuracy, self.avg_anti_accuracy))
            logging.info("test sparsity: %.4f test continuity: %.4f" %
                         (self.avg_sparsity, self.avg_continuity))

        if verbosity > 1:
            rand_idx = random.randint(0, self.test_batch_size - 1)
            # display a random example
            logging.info("Gold Label: " + str(batch_y_[rand_idx].item()) +
                         " Pred label: " + str(y_pred[rand_idx].item()))
            logging.info(
                self.display_example(batch_x_[rand_idx], batch_m_[rand_idx],
                                     z[rand_idx]))

        if self.args.save_best_model:
            if self.avg_accuracy > self.best_test_acc:
                logging.info("saving best model and model stats")
                # save model
                torch.save(
                    self.state_dict(),
                    os.path.join(
                        self.args.model_folder_path,
                        self.args.model_prefix + ".pth",
                    ),
                )

        if self.best_test_acc > self.avg_accuracy:
            self.best_test_acc = self.avg_accuracy
            self.epochs_since_improv = 0
        else:
            self.epochs_since_improv += 1
Example #8
0
    def explain_local(self, X, y=None, name=None) -> LocalExplanation:
        """ Create a local explanation for a given text
        :param X: String to be explained.
        :type X: str
        :param y: The ground truth label for the sentence
        :type y: string
        :param name: a name for saving the explanation, currently ignored
        :type str
        :return: local explanation object
        :rtype: DynamicLocalExplanation
        """
        X = _validate_X(X)

        model_args = self.model_config
        df_dummy_label = pd.DataFrame.from_dict({"labels": [0]})
        df_sentence = pd.concat(
            [df_dummy_label,
             self.preprocessor.preprocess([X.lower()])],
            axis=1)
        batch_dict = generate_data(df_sentence, self.model_config.cuda)
        x = batch_dict["x"]
        m = batch_dict["m"]
        predict_dict = self.predict(df_sentence)
        zs = predict_dict["rationale"]

        prediction = predict_dict["predict"]
        prediction_idx = prediction[0].max(0)[1]
        prediction = model_args.labels[prediction_idx]
        zs = np.array(zs.cpu())

        # The not hard_importance condition was implied, ids is undefined otherwise
        float_zs = self.model.get_z_scores(df_sentence)
        float_zs = float_zs[:, :, 1].detach()
        float_zs = np.array(float_zs.cpu())
        # set importances all words not selected as part of the rationale
        # to zero
        zs = zs * float_zs
        # generate human-readable tokens (individual words)
        seq_len = int(m.sum().item())
        ids = x[:seq_len][0]

        tokens = self.preprocessor.decode_single(ids)

        local_importance_values = zs.flatten()
        # post-processing for BERT to remove SEP and CLS tokens
        # TODO: might we want to add a "post-process" method to the preprocessor?
        tokens_to_remove = [BertTokens.SEP, BertTokens.CLS]
        token_indexes = [
            idx for idx, token in enumerate(tokens)
            if token in tokens_to_remove
        ]
        if token_indexes:
            local_importance_values = np.delete(local_importance_values,
                                                token_indexes)
            for token_index in sorted(token_indexes, reverse=True):
                del tokens[token_index]
        local_explanation = _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=local_importance_values,
            method=str(type(self.model)),
            model_task="classification",
            features=tokens,
            classes=model_args.labels,
            predicted_label=prediction,
        )
        return local_explanation