def tokens_to_indices( self, tokens: List[Token], vocabulary: Dictionary, index_name: str ) -> Dict[str, List[int]]: # pylint: disable=unused-argument return { "token_ids": [10, 15] + \ [vocabulary.get_token_index(token.text, 'words') for token in tokens] + \ [25], "additional_key": [22, 29] }
def tokens_to_indices(self, tokens: List[Token], vocabulary: Dictionary, index_name: str): indices: List[int] = [] for token in itertools.chain(self.start_tokens, tokens, self.end_tokens): if getattr(token, 'text_id', None) is not None: # `text_id` being set on the token means that we aren't using the vocab, we just use # this id instead. indices.append(token.text_id) else: text = token.text if self.lowercase_tokens: text = text.lower() indices.append(vocabulary.get_token_index( text, self.namespace)) return {index_name: indices}
def tokens_to_indices(self, tokens: List[Token], vocabulary: Dictionary, index_name: str) -> Dict[str, List[List[int]]]: indices: List[List[int]] = [] # Combine the tokens with start and end tokens for token in itertools.chain(self.start_tokens, tokens, self.end_tokens): token_indices: List[int] = [] if token.text is None: pass # ERROR else: for c in self.character_tokenizer.tokenize(token.text): if getattr(c, 'text_id', None) is not None: idx = c.text_id else: idx = vocabulary.get_token_index( c.text, self.namespace) token_indices.append(idx) indices.append(token_indices) return {index_name: indices}
def index(self, vocab: Dictionary): if self.indexed_labels is None: self.indexed_labels = [ vocab.get_token_index(l, self.label_namespace) for l in self.labels ]
def index(self, vocab: Dictionary): if self.label_id is None: self.label_id = vocab.get_token_index(self.label, self.label_namespace) # type: ignore