Exemple #1
0
 def _create_bert_tokenizer(self):
   vocab_file_name = "bert_vocab"
   vocab = _metadata_fb.AssociatedFileT()
   vocab.name = vocab_file_name
   vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
   tokenizer = _metadata_fb.ProcessUnitT()
   tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions
   tokenizer.options = _metadata_fb.BertTokenizerOptionsT()
   tokenizer.options.vocabFile = [vocab]
   return tokenizer, [vocab_file_name]
    def create_metadata(self) -> _metadata_fb.ProcessUnitT:
        """Creates the Bert tokenizer metadata based on the information.

    Returns:
      A Flatbuffers Python object of the Bert tokenizer metadata.
    """
        vocab = _metadata_fb.AssociatedFileT()
        vocab.name = self._vocab_file_path
        vocab.description = _VOCAB_FILE_DESCRIPTION
        vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
        tokenizer = _metadata_fb.ProcessUnitT()
        tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions
        tokenizer.options = _metadata_fb.BertTokenizerOptionsT()
        tokenizer.options.vocabFile = [vocab]
        return tokenizer