Beispiel #1
0
 def tokens_to_indices(
     self, tokens: List[Token], vocabulary: Dictionary, index_name: str
 ) -> Dict[str, List[int]]:  # pylint: disable=unused-argument
     return {
             "token_ids": [10, 15] + \
                       [vocabulary.get_token_index(token.text, 'words') for token in tokens] + \
                       [25],
             "additional_key": [22, 29]
     }
    def tokens_to_indices(self, tokens: List[Token], vocabulary: Dictionary,
                          index_name: str):
        indices: List[int] = []

        for token in itertools.chain(self.start_tokens, tokens,
                                     self.end_tokens):
            if getattr(token, 'text_id', None) is not None:
                # `text_id` being set on the token means that we aren't using the vocab, we just use
                # this id instead.
                indices.append(token.text_id)
            else:
                text = token.text
                if self.lowercase_tokens:
                    text = text.lower()
                indices.append(vocabulary.get_token_index(
                    text, self.namespace))

        return {index_name: indices}
Beispiel #3
0
    def tokens_to_indices(self, tokens: List[Token], vocabulary: Dictionary,
                          index_name: str) -> Dict[str, List[List[int]]]:
        indices: List[List[int]] = []

        # Combine the tokens with start and end tokens
        for token in itertools.chain(self.start_tokens, tokens,
                                     self.end_tokens):
            token_indices: List[int] = []
            if token.text is None:
                pass  # ERROR
            else:
                for c in self.character_tokenizer.tokenize(token.text):
                    if getattr(c, 'text_id', None) is not None:
                        idx = c.text_id
                    else:
                        idx = vocabulary.get_token_index(
                            c.text, self.namespace)
                    token_indices.append(idx)
                indices.append(token_indices)

        return {index_name: indices}
 def index(self, vocab: Dictionary):
     if self.indexed_labels is None:
         self.indexed_labels = [
             vocab.get_token_index(l, self.label_namespace)
             for l in self.labels
         ]
Beispiel #5
0
 def index(self, vocab: Dictionary):
   if self.label_id is None:
     self.label_id = vocab.get_token_index(self.label, self.label_namespace)  # type: ignore