def tag(self, text_field: TextField) -> Dict[str, Any]: """ Perform inference on a TextField to produce predicted tags and class probabilities over the possible tags. Parameters ---------- text_field : ``TextField``, required. A ``TextField`` containing the text to be tagged. Returns ------- A Dict containing: tags : List[str] A list the length of the text input, containing the predicted (argmax) tag from the model per token. class_probabilities : numpy.Array An array of shape (text_input_length, num_classes), where each row is a distribution over classes for a given token in the sentence. """ text_field.index(self.vocab) padding_lengths = text_field.get_padding_lengths() array_input = text_field.as_array(padding_lengths) # TODO(Mark): Generalise how the array is transformed into a variable after settling the data API. # Add a batch dimension by unsqueezing, because pytorch # doesn't support inputs without one. array_input = { "tokens": torch.autograd.Variable(torch.LongTensor( array_input["tokens"])).unsqueeze(0) } output_dict = self.forward(tokens=array_input) # Remove batch dimension, as we only had one input. predictions = output_dict["class_probabilities"].data.squeeze(0) _, argmax = predictions.max(-1) indices = argmax.squeeze(1).numpy() tags = [ self.vocab.get_token_from_index(x, namespace="tags") for x in indices ] return {"tags": tags, "class_probabilities": predictions.numpy()}
def test_as_array_produces_token_array(self): indexer = SpacyTokenIndexer() nlp = get_spacy_model("en_core_web_sm", parse=False, ner=False) tokens = [t for t in nlp("This is a sentence.")] field = TextField(tokens, token_indexers={"spacy": indexer}) vocab = Vocabulary() field.index(vocab) # Indexer functionality array_dict = indexer.tokens_to_indices(tokens, vocab) assert len(array_dict["tokens"]) == 5 assert len(array_dict["tokens"][0]) == 96 # Check it also works with field lengths = field.get_padding_lengths() array_dict = field.as_tensor(lengths) assert list(array_dict["spacy"]["tokens"].shape) == [5, 96]