Exemple #1
0
 def index(self, vocab: Vocabulary):
     if self._label_ids is None:
         self._label_ids = [
             vocab.get_token_index(label, self._label_namespace)  # type: ignore
             for label in self.labels
         ]
     if not self._num_labels:
         self._num_labels = vocab.get_vocab_size(self._label_namespace)
 def index(self, vocab: Vocabulary):
     if not self._skip_indexing:
         self._indexed_labels = [
             vocab.get_token_index(label,
                                   self._label_namespace)  # type: ignore
             for label in self.labels
         ]
Exemple #3
0
    def indices_to_tokens(self, indexed_tokens: IndexedTokenList,
                          vocabulary: Vocabulary) -> List[Token]:
        self._add_encoding_to_vocabulary_if_needed(vocabulary)

        token_ids = indexed_tokens["token_ids"]
        type_ids = indexed_tokens.get("type_ids")

        return [
            Token(
                text=vocabulary.get_token_from_index(token_ids[i],
                                                     self._namespace),
                text_id=token_ids[i],
                type_id=type_ids[i] if type_ids is not None else None,
            ) for i in range(len(token_ids))
        ]
    def tokens_to_indices(self, tokens: List[Token],
                          vocabulary: Vocabulary) -> Dict[str, List[int]]:
        indices: List[int] = []

        for token in itertools.chain(self._start_tokens, tokens,
                                     self._end_tokens):
            text = self._get_feature_value(token)
            if self.namespace is None:
                # We could have a check here that `text` is an int; not sure it's worth it.
                indices.append(text)  # type: ignore
            else:
                if self.lowercase_tokens:
                    text = text.lower()
                indices.append(vocabulary.get_token_index(
                    text, self.namespace))

        return {"tokens": indices}
 def tokens_to_indices(
         self, tokens: List[Token],
         vocabulary: Vocabulary) -> Dict[str, List[List[int]]]:
     indices: List[List[int]] = []
     for token in itertools.chain(self._start_tokens, tokens,
                                  self._end_tokens):
         token_indices: List[int] = []
         if token.text is None:
             raise ConfigurationError(
                 "TokenCharactersIndexer needs a tokenizer that retains text"
             )
         for character in self._character_tokenizer.tokenize(token.text):
             if getattr(character, "text_id", None) is not None:
                 # `text_id` being set on the token means that we aren't using the vocab, we just
                 # use this id instead.
                 index = character.text_id
             else:
                 index = vocabulary.get_token_index(character.text,
                                                    self._namespace)
             token_indices.append(index)
         indices.append(token_indices)
     return {"token_characters": indices}
 def index(self, vocab: Vocabulary):
     if self.labels is not None:
         self._indexed_labels = [
             vocab.get_token_index(label, self._label_namespace)
             for label in self.labels
         ]
Exemple #7
0
 def index(self, vocab: Vocabulary):
     if not self._skip_indexing:
         self._label_id = vocab.get_token_index(
             self.label, self._label_namespace  # type: ignore
         )
 def index(self, vocab: Vocabulary):
     self._mapping_array = [
         vocab.get_token_index(x.ensure_text(), self._target_namespace)
         for x in self._source_tokens
     ]