예제 #1
0
    def _convert_to_ragged_inputs(self, inputs):
        """Transforms the text batch inputs to a ragged shape."""
        if isinstance(self.input_data, ragged_tensor.RaggedTensor):
            return inputs

        inputs = text_ops.WhitespaceTokenizer().tokenize(inputs)
        return inputs
예제 #2
0
  def benchmark_pad_along_dimension(self):
    self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data)

    self._run(text_ops.pad_along_dimension, {
        "axis": -1,
        "right_pad": ["RP"],
        "left_pad": ["LP"]
    })
예제 #3
0
    def benchmark_wordpiece_tokenizer(self):
        self.input_data = text_ops.WhitespaceTokenizer().tokenize(
            self.input_data)

        tokenizer = text_ops.WordpieceTokenizer(
            vocab_lookup_table=self._create_table((_BERT_VOCAB_PATH)),
            unknown_token=None,
            token_out_type=dtypes.int64)
        self._run(tokenizer)
예제 #4
0
  def benchmark_ngrams(self):
    self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data)

    self._run(
        text_ops.ngrams, {
            "width": 2,
            "axis": -1,
            "reduction_type": text_ops.Reduction.STRING_JOIN,
            "string_separator": "|"
        })
예제 #5
0
    def benchmark_sliding_window(self):
        self.input_data = text_ops.WhitespaceTokenizer().tokenize(
            self.input_data)

        self._run(text_ops.sliding_window, {"width": 3, "axis": -1})