예제 #1
0
    def explain_local(self, X, y=None, name=None):
        """Returns an explanation object containing explanations over words
            in the input text string.
        :param X: String to be explained.
        :type X: str
        :param y: The predicted label for the sentence
        :type y: string
        :param name: a name for saving the explanation, currently ignored
        :type str
        :return: A model explanation object containing importances and metadata.
        :rtype: LocalExplanation
        """
        X = _validate_X(X)

        [encoded_text, _] = self.preprocessor.encode_features(X,
                                                              needs_fit=False)
        encoded_label = self.model.predict(encoded_text)

        if y is None:
            y = self.preprocessor.labelEncoder.inverse_transform(encoded_label)

        # convert from vector to scalar
        encoded_label = encoded_label[0]

        # Obtain the top feature ids for the selected class label
        if hasattr(self.model, "coef_"):
            # when #labels == 2, coef_ returns 1D array
            label_coefs_all = self.model.coef_
            if len(self.preprocessor.labelEncoder.classes_) == 2:
                label_coefs_all = np.vstack(
                    (-1 * label_coefs_all, label_coefs_all))
            encoded_imp = label_coefs_all[encoded_label, :]
        elif hasattr(self.model, "feature_importances_"):
            encoded_imp = self.model.feature_importances_
        else:
            raise Exception(
                "model is missing coef_ or feature_importances_ attribute")
        decoded_imp, parsed_sentence_list = self.preprocessor.decode_imp(
            encoded_imp, X)

        local_explanantion = _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=np.array(decoded_imp),
            method=str(type(self.model)),
            model_task="classification",
            features=parsed_sentence_list,
            classes=self.preprocessor.labelEncoder.classes_,
            predicted_label=y)
        return local_explanantion
예제 #2
0
    def explain_local(self, input_text, abs_sum_to_one=False):
        """Returns an explanation object containing explanations over words
            in the input text string.
        :param input_text: String to be explained.
        :type input_text: str
        :return: A model explanation object containing importances and metadata.
        :rtype: LocalExplanation
        """

        [encoded_text, _] = self.preprocessor.encode_features(input_text,
                                                              needs_fit=False)
        encoded_label = self.model.predict(encoded_text)
        # convert from vector to scalar
        encoded_label = encoded_label[0]
        # Obtain the top feature ids for the selected class label
        if hasattr(self.model, "coef_"):
            # when #labels == 2, coef_ returns 1D array
            label_coefs_all = self.model.coef_
            if len(self.preprocessor.labelEncoder.classes_) == 2:
                label_coefs_all = np.vstack(
                    (-1 * label_coefs_all, label_coefs_all))
            encoded_imp = label_coefs_all[encoded_label, :]
        elif hasattr(self.model, "feature_importances_"):
            encoded_imp = self.model.feature_importances_
        else:
            raise Exception(
                "model is missing coef_ or feature_importances_ attribute")
        decoded_imp, parsed_sentence_list = self.preprocessor.decode_imp(
            encoded_imp, input_text)

        if abs_sum_to_one is True:
            decoded_imp = decoded_imp / (np.sum(np.abs(decoded_imp)))

        local_explanantion = _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=np.array(decoded_imp),
            method=str(type(self.model)),
            model_task="classification",
            features=parsed_sentence_list,
            classes=self.preprocessor.labelEncoder.classes_,
        )
        return local_explanantion
    def explain_local(self, X, y=None, name=None, num_iteration=150):
        """Explain the model by using MSRA's interpretor
        :param X: The text
        :type X: string
        :param y: The predicted label for the sentence
        :type y: string
        :param name: a name for saving the explanation, currently ignored
        :type str
        :param num_iteration: The number of iterations through the optimize function. This is a parameter
            that should be tuned to your dataset. If set to 0, all words will be important as the Loss function
            will not be optimzed. If set to a very high number, all words will not be important as the loss will
            be severly optimized. The more the iterations, slower the explanations.
        :type num_iteration: int
        :return: A model explanation object. It is guaranteed to be a LocalExplanation
        :rtype: DynamicLocalExplanation
        """
        X = _validate_X(X)

        embedded_input, parsed_sentence = _get_single_embedding(
            self.model, X, self.device)
        self.input_embeddings = embedded_input
        self.parsed_sentence = parsed_sentence

        self.input_size = self.input_embeddings.size(0)
        self.input_dimension = self.input_embeddings.size(1)
        self.ratio = nn.Parameter(torch.randn(self.input_size, 1),
                                  requires_grad=True)
        self.input_embeddings.to(self.device)

        if self.regular is None:
            assert self.train_dataset is not None, "Training dataset is required"

            # sample the training dataset
            if len(self.train_dataset) <= self.max_points:
                sampled_train_dataset = self.train_dataset
            else:
                sampled_train_dataset = random.sample(self.train_dataset,
                                                      k=self.max_points)

            training_embeddings = make_bert_embeddings(sampled_train_dataset,
                                                       self.model, self.device)
            regularization = self._calculate_regularization(
                training_embeddings, self.model).tolist()
            self.regular = nn.Parameter(
                torch.tensor(regularization).to(self.input_embeddings),
                requires_grad=False,
            )
            self.Phi = self._generate_Phi(layer=self.target_layer)

        # values below are arbitarily set for now
        self._optimize(num_iteration, lr=0.01, show_progress=True)
        local_importance_values = self._get_sigma()
        self.local_importance_values = local_importance_values
        return _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=np.array(local_importance_values)[1:-1],
            method="neural network",
            model_task="classification",
            features=self.parsed_sentence[1:-1],
            classes=self.classes,
            predicted_label=y,
        )
예제 #4
0
    def explain_local(self, X, y=None, name=None) -> LocalExplanation:
        """ Create a local explanation for a given text
        :param X: String to be explained.
        :type X: str
        :param y: The ground truth label for the sentence
        :type y: string
        :param name: a name for saving the explanation, currently ignored
        :type str
        :return: local explanation object
        :rtype: DynamicLocalExplanation
        """
        X = _validate_X(X)

        model_args = self.model_config
        df_dummy_label = pd.DataFrame.from_dict({"labels": [0]})
        df_sentence = pd.concat(
            [df_dummy_label,
             self.preprocessor.preprocess([X.lower()])],
            axis=1)
        batch_dict = generate_data(df_sentence, self.model_config.cuda)
        x = batch_dict["x"]
        m = batch_dict["m"]
        predict_dict = self.predict(df_sentence)
        zs = predict_dict["rationale"]

        prediction = predict_dict["predict"]
        prediction_idx = prediction[0].max(0)[1]
        prediction = model_args.labels[prediction_idx]
        zs = np.array(zs.cpu())

        # The not hard_importance condition was implied, ids is undefined otherwise
        float_zs = self.model.get_z_scores(df_sentence)
        float_zs = float_zs[:, :, 1].detach()
        float_zs = np.array(float_zs.cpu())
        # set importances all words not selected as part of the rationale
        # to zero
        zs = zs * float_zs
        # generate human-readable tokens (individual words)
        seq_len = int(m.sum().item())
        ids = x[:seq_len][0]

        tokens = self.preprocessor.decode_single(ids)

        local_importance_values = zs.flatten()
        # post-processing for BERT to remove SEP and CLS tokens
        # TODO: might we want to add a "post-process" method to the preprocessor?
        tokens_to_remove = [BertTokens.SEP, BertTokens.CLS]
        token_indexes = [
            idx for idx, token in enumerate(tokens)
            if token in tokens_to_remove
        ]
        if token_indexes:
            local_importance_values = np.delete(local_importance_values,
                                                token_indexes)
            for token_index in sorted(token_indexes, reverse=True):
                del tokens[token_index]
        local_explanation = _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=local_importance_values,
            method=str(type(self.model)),
            model_task="classification",
            features=tokens,
            classes=model_args.labels,
            predicted_label=prediction,
        )
        return local_explanation
예제 #5
0
 def explain_local(self, text: str, **kwargs) -> _create_local_explanation:
     """ Create a local explanation for a given text
     :param text: A segment of text
     :type text: str
     :param kwargs:
             preprocessor: an intialized preprocessor to tokenize the
             given text with .preprocess() and .decode_single() methods
             preprocessor: Ex. GlovePreprocessor or BertPreprocessor
             hard_importances: whether to generate "hard" important/ non-important rationales
             or float rationale scores, defaults to True
             hard_importances: bool, optional
     :return: local explanation object
     :rtype: DynamicLocalExplanation
     """
     model_args = self.model_config
     df_dummy_label = pd.DataFrame.from_dict({"labels": [0]})
     df_sentence = pd.concat(
         [df_dummy_label,
          self.preprocessor.preprocess([text.lower()])],
         axis=1)
     batch_dict = generate_data(df_sentence, self.model_config.cuda)
     x = batch_dict["x"]
     m = batch_dict["m"]
     predict_dict = self.predict(df_sentence)
     zs = predict_dict["rationale"]
     prediction = predict_dict["predict"]
     prediction_idx = prediction[0].max(0)[1]
     prediction = model_args.labels[prediction_idx]
     zs = np.array(zs.cpu())
     if not kwargs['hard_importances']:
         float_zs = self.model.get_z_scores(df_sentence)
         float_zs = float_zs[:, :, 1].detach()
         float_zs = np.array(float_zs.cpu())
         # set importances all words not selected as part of the rationale
         # to zero
         zs = zs * float_zs
         # generate human-readable tokens (individual words)
         seq_len = int(m.sum().item())
         ids = x[:seq_len][0]
     tokens = kwargs['preprocessor'].decode_single(ids)
     local_importance_values = zs.flatten()
     # post-processing for BERT to remove SEP and CLS tokens
     # TODO: might we want to add a "post-process" method to the preprocessor?
     tokens_to_remove = [BertTokens.SEP, BertTokens.CLS]
     token_indexes = [
         idx for idx, token in enumerate(tokens)
         if token in tokens_to_remove
     ]
     if token_indexes:
         local_importance_values = np.delete(local_importance_values,
                                             token_indexes)
         for token_index in sorted(token_indexes, reverse=True):
             del tokens[token_index]
     local_explanation = _create_local_explanation(
         classification=True,
         text_explanation=True,
         local_importance_values=local_importance_values,
         method=str(type(self.model)),
         model_task="classification",
         features=tokens,
         classes=model_args.labels,
         predicted_label=prediction,
     )
     return local_explanation