예제 #1
0
    def _compute_attributions(
        self,
        single_forward_output: Dict[str, numpy.ndarray],
        instance: Instance,
        n_steps: int = 50,
    ) -> List[List[Attribution]]:
        """Attributes the prediction to the input.

        The attributions are calculated by means of the [Integrated Gradients](https://arxiv.org/abs/1703.01365) method.

        Parameters
        ----------
        single_forward_output
            Non-batched forward output containing numpy arrays
        instance
            The instance containing the input data
        n_steps
            The number of steps used when calculating the attribution of each token.

        Returns
        -------
        attributions
            A list of list of attributions due to the the ListField level
        """
        # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze)
        embeddings = torch.from_numpy(
            single_forward_output["embeddings"]).unsqueeze(0)
        mask = torch.from_numpy(single_forward_output["mask"]).unsqueeze(0)
        logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0)

        ig = IntegratedGradients(self._encoder_and_head_forward)
        attributions, delta = ig.attribute(
            embeddings,
            n_steps=n_steps,
            target=torch.argmax(logits),
            additional_forward_args=mask,
            return_convergence_delta=True,
        )
        attributions = attributions.sum(dim=3).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        document_tokens = [
            cast(TextField, text_field).tokens for text_field in cast(
                ListField, instance.get(self.forward_arg_name))
        ]

        return [[
            Attribution(
                text=token.text,
                start=token.idx,
                end=self._get_token_end(token),
                field=self.forward_arg_name,
                attribution=attribution,
            ) for token, attribution in zip(sentence_tokens,
                                            sentence_attributions)
        ] for sentence_tokens, sentence_attributions in zip(
            document_tokens, attributions)]
    def explain_prediction(
        self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int
    ) -> Dict[str, Any]:
        """Here, we must apply transformations for manage ListFields tensors shapes"""

        dataset = Batch([instance])
        input_tokens_ids = dataset.as_tensor_dict()
        ig = IntegratedGradients(self._explain_embeddings)

        num_wrapping_dims = 1

        document_tokens = [
            [token.text for token in cast(TextField, text_field).tokens]
            for text_field in cast(ListField, instance.get(self.forward_arg_name))
        ]
        document_tensors = input_tokens_ids.get(self.forward_arg_name)
        mask = get_text_field_mask(
            document_tensors, num_wrapping_dims=num_wrapping_dims
        )
        text_embeddings = self.backbone.embedder.forward(
            document_tensors, num_wrapping_dims=num_wrapping_dims
        )

        label_id = vocabulary.index_for_label(
            self.backbone.vocab, prediction.get(self.label_name)
        )
        attributions, delta = ig.attribute(
            text_embeddings,
            target=label_id,
            additional_forward_args=mask,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        attributions = attributions.sum(dim=3).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        return {
            **prediction,
            "explain": {
                self.forward_arg_name: [
                    [
                        {"token": token, "attribution": attribution}
                        for token, attribution in zip(
                            sentence_tokens, sentence_attribution
                        )
                    ]
                    for sentence_tokens, sentence_attribution in zip(
                        document_tokens, attributions
                    )
                ]
            },
        }
예제 #3
0
    def explain_prediction(
        self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int
    ) -> Dict[str, Any]:

        dataset = Batch([instance])
        input_tokens_ids = dataset.as_tensor_dict()
        ig = IntegratedGradients(self._explain_embeddings)

        num_wrapping_dims = 0

        text_tokens = [
            token.text
            for token in cast(TextField, instance.get(self.forward_arg_name)).tokens
        ]
        text_tensor = input_tokens_ids.get(self.forward_arg_name)
        mask = get_text_field_mask(text_tensor, num_wrapping_dims=num_wrapping_dims)
        text_embeddings = self.backbone.embedder.forward(
            text_tensor, num_wrapping_dims=num_wrapping_dims
        )

        label_id = vocabulary.index_for_label(
            self.backbone.vocab, prediction["labels"][0]
        )
        attributions, delta = ig.attribute(
            text_embeddings,
            n_steps=n_steps,
            target=label_id,
            additional_forward_args=mask,
            return_convergence_delta=True,
        )
        attributions = attributions.sum(dim=2).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        return {
            **prediction,
            "explain": {
                self.forward_arg_name: [
                    {"token": token, "attribution": attribution}
                    for token, attribution in zip(text_tokens, attributions)
                ]
            },
        }
예제 #4
0
    def explain_prediction(self, prediction: Dict[str, np.array],
                           instance: Instance, n_steps: int) -> Dict[str, Any]:
        """Calculates attributions for each data field in the record by integrating the gradients.

        IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task
        of the two records.

        Parameters
        ----------
        prediction
        instance
        n_steps

        Returns
        -------
        prediction_dict
            The prediction dictionary with a newly added "explain" key
        """
        batch = Batch([instance])
        tokens_ids = batch.as_tensor_dict()

        # 1. Get field encodings
        # TODO(dcfidalgo): optimize: for the prediction we already embedded and field encoded the records.
        #     Also, the forward passes here are always done on cpu!
        field_encoded_record1, record_mask_record1 = self._field_encoding(
            tokens_ids.get(self._RECORD1_ARG_NAME_IN_FORWARD))
        field_encoded_record2, record_mask_record2 = self._field_encoding(
            tokens_ids.get(self._RECORD2_ARG_NAME_IN_FORWARD))
        if not field_encoded_record1.size() == field_encoded_record2.size():
            raise RuntimeError(
                "Both records must have the same number of data fields!")

        # 2. Get attributes
        ig = IntegratedGradients(self._bimpm_forward)

        prediction_target = int(np.argmax(prediction["probs"]))
        ig_attribute_record1 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record2, field_encoded_record2),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )

        ig_attribute_record2 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record1, field_encoded_record1),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work
        # # duplicate case:
        # # Here we integrate each record along the path from the null vector -> record1/2
        # # assuming that the null vector provides the highest "not duplicate" score.
        # if prediction_target == 0:
        #     ig_attribute_record1 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        #     ig_attribute_record2 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        # # not duplicate case:
        # # Here we integrate each record along the path from record2/1 -> record1/2
        # # assuming that the same record provides the highest "duplicate" score.
        # elif prediction_target == 1:
        #     ...
        # else:
        #     raise RuntimeError("The `explain` method is only implemented for a binary classification task: "
        #                        "[duplicate, not_duplicate]")

        attributions_record1, delta_record1 = self._get_attributions_and_delta(
            ig_attribute_record1, 0)
        attributions_record2, delta_record2 = self._get_attributions_and_delta(
            ig_attribute_record2, 1)

        # 3. Get tokens corresponding to the attributions
        field_tokens_record1 = []
        for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD):
            field_tokens_record1.append(" ".join(
                [token.text for token in textfield.tokens]))
        field_tokens_record2 = []
        for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD):
            field_tokens_record2.append(" ".join(
                [token.text for token in textfield.tokens]))

        return {
            **prediction,
            "explain": {
                self._RECORD1_ARG_NAME_IN_FORWARD: [{
                    "token": token,
                    "attribution": attribution
                } for token, attribution in zip(field_tokens_record1,
                                                attributions_record1)],
                self._RECORD2_ARG_NAME_IN_FORWARD: [{
                    "token": token,
                    "attribution": attribution
                } for token, attribution in zip(field_tokens_record2,
                                                attributions_record2)],
            },
        }
    def _compute_attributions(
        self,
        single_forward_output: Dict[str, numpy.ndarray],
        instance: Instance,
        n_steps: int = 50,
    ) -> List[Attribution]:
        """Computes attributions for each data field in the record by means of the
        [Integrated Gradients](https://arxiv.org/abs/1703.01365) method.

        IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task
        of the two records.

        Parameters
        ----------
        single_forward_output
            Non-batched forward output containing numpy arrays
        instance
            The instance containing the input data
        n_steps
            The number of steps used when calculating the attribution of each token.

        Returns
        -------
        attributions
        """
        # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze)
        field_encoded_record1 = torch.from_numpy(
            single_forward_output["field_encoded_record1"]).unsqueeze(0)
        record_mask_record1 = torch.from_numpy(
            single_forward_output["record_mask_record1"]).unsqueeze(0)

        field_encoded_record2 = torch.from_numpy(
            single_forward_output["field_encoded_record2"]).unsqueeze(0)
        record_mask_record2 = torch.from_numpy(
            single_forward_output["record_mask_record2"]).unsqueeze(0)

        logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0)

        if not field_encoded_record1.size() == field_encoded_record2.size():
            raise RuntimeError(
                "Both records must have the same number of data fields!")

        # 2. Get attributes
        ig = IntegratedGradients(self._bimpm_forward)

        prediction_target = torch.argmax(logits)

        ig_attribute_record1 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record2, field_encoded_record2),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )

        ig_attribute_record2 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record1, field_encoded_record1),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work
        # # duplicate case:
        # # Here we integrate each record along the path from the null vector -> record1/2
        # # assuming that the null vector provides the highest "not duplicate" score.
        # if prediction_target == 0:
        #     ig_attribute_record1 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        #     ig_attribute_record2 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        # # not duplicate case:
        # # Here we integrate each record along the path from record2/1 -> record1/2
        # # assuming that the same record provides the highest "duplicate" score.
        # elif prediction_target == 1:
        #     ...
        # else:
        #     raise RuntimeError("The `compute_attributions` method is only implemented for a binary classification task: "
        #                        "[duplicate, not_duplicate]")

        attributions_record1, delta_record1 = self._get_attributions_and_delta(
            ig_attribute_record1, 0)
        attributions_record2, delta_record2 = self._get_attributions_and_delta(
            ig_attribute_record2, 1)

        # 3. Get tokens corresponding to the attributions
        field_text_record1 = []
        for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD):
            field_text_record1.append(" ".join(
                [token.text for token in textfield.tokens]))
        field_text_record2 = []
        for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD):
            field_text_record2.append(" ".join(
                [token.text for token in textfield.tokens]))

        output_record1 = [
            Attribution(
                text=field_text,
                start=0,
                end=len(field_text),
                field=self._RECORD1_ARG_NAME_IN_FORWARD,
                attribution=attribution,
            ) for field_text, attribution in zip(field_text_record1,
                                                 attributions_record1)
        ]
        output_record2 = [
            Attribution(
                text=field_text,
                start=0,
                end=len(field_text),
                field=self._RECORD2_ARG_NAME_IN_FORWARD,
                attribution=attribution,
            ) for field_text, attribution in zip(field_text_record2,
                                                 attributions_record2)
        ]

        return output_record1 + output_record2