def _convert_to_ragged_inputs(self, inputs): """Transforms the text batch inputs to a ragged shape.""" if isinstance(self.input_data, ragged_tensor.RaggedTensor): return inputs inputs = text_ops.WhitespaceTokenizer().tokenize(inputs) return inputs
def benchmark_pad_along_dimension(self): self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) self._run(text_ops.pad_along_dimension, { "axis": -1, "right_pad": ["RP"], "left_pad": ["LP"] })
def benchmark_wordpiece_tokenizer(self): self.input_data = text_ops.WhitespaceTokenizer().tokenize( self.input_data) tokenizer = text_ops.WordpieceTokenizer( vocab_lookup_table=self._create_table((_BERT_VOCAB_PATH)), unknown_token=None, token_out_type=dtypes.int64) self._run(tokenizer)
def benchmark_ngrams(self): self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) self._run( text_ops.ngrams, { "width": 2, "axis": -1, "reduction_type": text_ops.Reduction.STRING_JOIN, "string_separator": "|" })
def benchmark_sliding_window(self): self.input_data = text_ops.WhitespaceTokenizer().tokenize( self.input_data) self._run(text_ops.sliding_window, {"width": 3, "axis": -1})