def run_test(self, testcase, **kwargs): ( _, sentence, model_mock_output, first_expected_output, last_expected_output, average_expected_output, ) = testcase if kwargs["aggregation"] == "first": expected_output = first_expected_output if kwargs["aggregation"] == "last": expected_output = last_expected_output if kwargs["aggregation"] == "average": expected_output = average_expected_output self.model.return_value = ("placeholder", model_mock_output) words = sentence ( hidden_states, extracted_words, ) = transformers_extractor.extract_sentence_representations( " ".join(words), self.model, self.tokenizer, **kwargs) self.assertEqual(len(extracted_words), len(words)) self.assertEqual(hidden_states.shape[1], len(words)) # Test output from all layers for l in range(self.num_layers): np.testing.assert_array_almost_equal( hidden_states[l, :, :], expected_output[l][:, :].numpy())
def test_extract_sentence_representations_long_input_dropped_token(self, mock_stdout): "Input longer than tokenizer's limit with dropped token" _, sentence, model_mock_output, _, expected_output , _ = self.tests_data[17] self.model.return_value = ("placeholder", model_mock_output) hidden_states, extracted_words = transformers_extractor.extract_sentence_representations(" ".join(sentence), self.model, self.tokenizer) self.assertIn("Input truncated because of length", mock_stdout.getvalue()) for l in range(1, self.num_layers): np.testing.assert_array_almost_equal(hidden_states[l,:,:], expected_output[l][:, :].numpy())
def test_extract_sentence_representations_exclude_embeddings(self): "Extraction without embedding layer" _, sentence, model_mock_output, _, expected_output , _ = self.tests_data[1] self.model.return_value = ("placeholder", model_mock_output) hidden_states, extracted_words = transformers_extractor.extract_sentence_representations(" ".join(sentence), self.model, self.tokenizer, include_embeddings=False) self.assertEqual(hidden_states.shape[0], self.num_layers - 1) for l in range(1, self.num_layers): np.testing.assert_array_almost_equal(hidden_states[l-1,:,:], expected_output[l][:, :].numpy())
def __call__(self, tokens, layer, neuron, **kwargs): """ Visualize the activations of ``neuron`` from ``layer`` in the loaded model on the given tokens. Parameters ---------- tokens : list of str List of tokens to compute and visualize the activations for layer : int Layer index of the chosen neuron to visualize in the loaded model neuron : int Neuron index of the chosen neuron kwargs : dict Dictionary of keyword arguments to pass to ``visualize_activations`` """ with self.__HiddenPrints__(): activations, _ = extract_sentence_representations( " ".join(tokens), self.model, self.tokenizer) return visualize_activations(tokens, activations[layer, :, neuron], **kwargs)