def run_test(self, testcase, **kwargs):
        (
            _,
            sentence,
            model_mock_output,
            first_expected_output,
            last_expected_output,
            average_expected_output,
        ) = testcase
        if kwargs["aggregation"] == "first":
            expected_output = first_expected_output
        if kwargs["aggregation"] == "last":
            expected_output = last_expected_output
        if kwargs["aggregation"] == "average":
            expected_output = average_expected_output
        self.model.return_value = ("placeholder", model_mock_output)

        words = sentence
        (
            hidden_states,
            extracted_words,
        ) = transformers_extractor.extract_sentence_representations(
            " ".join(words), self.model, self.tokenizer, **kwargs)
        self.assertEqual(len(extracted_words), len(words))
        self.assertEqual(hidden_states.shape[1], len(words))

        # Test output from all layers
        for l in range(self.num_layers):
            np.testing.assert_array_almost_equal(
                hidden_states[l, :, :], expected_output[l][:, :].numpy())
Exemple #2
0
    def test_extract_sentence_representations_long_input_dropped_token(self, mock_stdout):
        "Input longer than tokenizer's limit with dropped token"
        _, sentence, model_mock_output, _, expected_output , _  = self.tests_data[17]
        self.model.return_value = ("placeholder", model_mock_output)

        hidden_states, extracted_words = transformers_extractor.extract_sentence_representations(" ".join(sentence), self.model, self.tokenizer)

        self.assertIn("Input truncated because of length", mock_stdout.getvalue())

        for l in range(1, self.num_layers):
            np.testing.assert_array_almost_equal(hidden_states[l,:,:], expected_output[l][:, :].numpy())
Exemple #3
0
    def test_extract_sentence_representations_exclude_embeddings(self):
        "Extraction without embedding layer"
        _, sentence, model_mock_output, _, expected_output , _  = self.tests_data[1]
        self.model.return_value = ("placeholder", model_mock_output)

        hidden_states, extracted_words = transformers_extractor.extract_sentence_representations(" ".join(sentence), self.model, self.tokenizer, include_embeddings=False)

        self.assertEqual(hidden_states.shape[0], self.num_layers - 1)

        for l in range(1, self.num_layers):
            np.testing.assert_array_almost_equal(hidden_states[l-1,:,:], expected_output[l][:, :].numpy())
Exemple #4
0
    def __call__(self, tokens, layer, neuron, **kwargs):
        """
        Visualize the activations of ``neuron`` from ``layer`` in the loaded
        model on the given tokens.

        Parameters
        ----------
        tokens : list of str
            List of tokens to compute and visualize the activations for
        layer : int
            Layer index of the chosen neuron to visualize in the loaded model
        neuron : int
            Neuron index of the chosen neuron
        kwargs : dict
            Dictionary of keyword arguments to pass to ``visualize_activations``
        """

        with self.__HiddenPrints__():
            activations, _ = extract_sentence_representations(
                " ".join(tokens), self.model, self.tokenizer)
        return visualize_activations(tokens, activations[layer, :, neuron],
                                     **kwargs)