Python SentencepieceTokenizer Examples

Programming Language: Python

Namespace/Package Name: tensorflow_lite_support.custom_ops.python.sentencepiece_tokenizer

Method/Function: SentencepieceTokenizer

Examples at hotexamples.com: 4

Python SentencepieceTokenizer - 4 examples found. These are the top rated real world Python examples of tensorflow_lite_support.custom_ops.python.sentencepiece_tokenizer.SentencepieceTokenizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: sentencepiece_tokenizer_test.py Project: willoughbyrm/tflite-support

    def test_tftext_sentencepiece_tokenizer_bos_eos(self):
        """Check that the new tokenizer produces the same result that the tftext one with bos and eos."""
        tftext_sp = tensorflow_text.SentencepieceTokenizer(
            self.sentencepiece_model, add_bos=True, add_eos=True)
        opt_sp = sentencepiece_tokenizer.SentencepieceTokenizer(
            self.sentencepiece_model, add_bos=True, add_eos=True)

        input_text = [
            u" ", u"to be or not to be", u"ignored by length text1",
            u"ignored by length text2"
        ]
        tftext_tokenized = tftext_sp.tokenize(input_text)
        opt_tokenized = opt_sp.tokenize(input_text)
        self.assertAllEqual(tftext_tokenized, opt_tokenized)

Example #2

Show file

File: sentencepiece_tokenizer_test.py Project: willoughbyrm/tflite-support

    def test_tftext_sentencepiece_detokenizer(self):
        """Check that the new tokenizer produces the same result that the tftext one."""
        tftext_sp = tensorflow_text.SentencepieceTokenizer(
            self.sentencepiece_model)
        opt_sp = sentencepiece_tokenizer.SentencepieceTokenizer(
            self.sentencepiece_model)

        input_text = [
            u" ", u"to be or not to be", u"ignored by length text1",
            u"ignored by length text2"
        ]
        tftext_tokenized = tftext_sp.tokenize(input_text)

        # Check detokenizer
        tftext_detokenized = tftext_sp.detokenize(tftext_tokenized)
        opt_detokenized = opt_sp.detokenize(tftext_tokenized)
        self.assertAllEqual(tftext_detokenized, opt_detokenized)

Example #3

Show file

File: sentencepiece_tokenizer_test.py Project: willoughbyrm/tflite-support

    def benchmarkTokenizer(self):
        sp_model = _GetSentencepieceModel()
        test_text = [
            "This week we celebrate the casts and creatives who have come together"
            " to bring us our favorite.",
            "More Stacks products demonstrated commitment to excellent support.",
            "Test, test, test."
        ]

        tftext_sp = tensorflow_text.SentencepieceTokenizer(sp_model)
        opt_sp = sentencepiece_tokenizer.SentencepieceTokenizer(sp_model)
        iter_number = 1000
        start = time.time()
        for _ in range(iter_number):
            _ = opt_sp.tokenize(test_text)
        self.report_benchmark(iters=iter_number,
                              wall_time=time.time() - start,
                              name="opt")
        start = time.time()
        for _ in range(iter_number):
            _ = tftext_sp.tokenize(test_text)
        self.report_benchmark(iters=iter_number,
                              wall_time=time.time() - start,
                              name="tf.text")

Example #4

Show file

File: sentencepiece_tokenizer_test.py Project: willoughbyrm/tflite-support

 def __init__(self, sentencepiece_model, **kwargs):
     super(TokenizerLayer, self).__init__(**kwargs)
     self.sp = sentencepiece_tokenizer.SentencepieceTokenizer(
         sentencepiece_model)