Example #1
0
 def _create_bert_tokenizer(self):
   vocab_file_name = "bert_vocab"
   vocab = _metadata_fb.AssociatedFileT()
   vocab.name = vocab_file_name
   vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
   tokenizer = _metadata_fb.ProcessUnitT()
   tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions
   tokenizer.options = _metadata_fb.BertTokenizerOptionsT()
   tokenizer.options.vocabFile = [vocab]
   return tokenizer, [vocab_file_name]
Example #2
0
    def create_metadata(self) -> _metadata_fb.ProcessUnitT:
        """Creates the score calibration metadata based on the information.

    Returns:
      A Flatbuffers Python object of the score calibration metadata.
    """
        score_calibration = _metadata_fb.ProcessUnitT()
        score_calibration.optionsType = (
            _metadata_fb.ProcessUnitOptions.ScoreCalibrationOptions)
        options = _metadata_fb.ScoreCalibrationOptionsT()
        options.scoreTransformation = self._score_transformation_type
        options.defaultScore = self._default_score
        score_calibration.options = options
        return score_calibration
Example #3
0
    def create_metadata(self) -> _metadata_fb.ProcessUnitT:
        """Creates the Bert tokenizer metadata based on the information.

    Returns:
      A Flatbuffers Python object of the Bert tokenizer metadata.
    """
        vocab = _metadata_fb.AssociatedFileT()
        vocab.name = self._vocab_file_path
        vocab.description = _VOCAB_FILE_DESCRIPTION
        vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
        tokenizer = _metadata_fb.ProcessUnitT()
        tokenizer.optionsType = _metadata_fb.ProcessUnitOptions.BertTokenizerOptions
        tokenizer.options = _metadata_fb.BertTokenizerOptionsT()
        tokenizer.options.vocabFile = [vocab]
        return tokenizer
Example #4
0
 def _create_sentence_piece_tokenizer(self):
   sp_model_name = "sp_model"
   vocab_file_name = "sp_vocab"
   sp_model = _metadata_fb.AssociatedFileT()
   sp_model.name = sp_model_name
   vocab = _metadata_fb.AssociatedFileT()
   vocab.name = vocab_file_name
   vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
   tokenizer = _metadata_fb.ProcessUnitT()
   tokenizer.optionsType = (
       _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions)
   tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT()
   tokenizer.options.sentencePieceModel = [sp_model]
   tokenizer.options.vocabFile = [vocab]
   return tokenizer, [sp_model_name, vocab_file_name]
Example #5
0
    def create_metadata(self) -> _metadata_fb.TensorMetadataT:
        """Creates the input image metadata based on the information.

    Returns:
      A Flatbuffers Python object of the input image metadata.
    """
        tensor_metadata = super().create_metadata()
        tensor_metadata.content.contentProperties.colorSpace = self.color_space_type
        # Create normalization parameters
        if self.norm_mean and self.norm_std:
            normalization = _metadata_fb.ProcessUnitT()
            normalization.optionsType = (
                _metadata_fb.ProcessUnitOptions.NormalizationOptions)
            normalization.options = _metadata_fb.NormalizationOptionsT()
            normalization.options.mean = self.norm_mean
            normalization.options.std = self.norm_std
            tensor_metadata.processUnits = [normalization]
        return tensor_metadata
Example #6
0
    def create_metadata(self) -> _metadata_fb.ProcessUnitT:
        """Creates the sentence piece tokenizer metadata based on the information.

    Returns:
      A Flatbuffers Python object of the sentence piece tokenizer metadata.
    """
        tokenizer = _metadata_fb.ProcessUnitT()
        tokenizer.optionsType = (
            _metadata_fb.ProcessUnitOptions.SentencePieceTokenizerOptions)
        tokenizer.options = _metadata_fb.SentencePieceTokenizerOptionsT()

        sp_model = _metadata_fb.AssociatedFileT()
        sp_model.name = self._sentence_piece_model_path
        sp_model.description = self._SP_MODEL_DESCRIPTION
        tokenizer.options.sentencePieceModel = [sp_model]
        if self._vocab_file_path:
            vocab = _metadata_fb.AssociatedFileT()
            vocab.name = self._vocab_file_path
            vocab.description = self._SP_VOCAB_FILE_DESCRIPTION
            vocab.type = _metadata_fb.AssociatedFileType.VOCABULARY
            tokenizer.options.vocabFile = [vocab]
        return tokenizer