def test_subword_decode_numpy_int32(self):
     encoder = text_encoder_utils.create_text_encoder(
         "subword", _SUBWORD_VOCAB)
     ids = np.array([9, 10, 11, 12, 1, 0], dtype=np.int32)
     # Without tolist(), the test will not pass for any other np array types
     # other than int64.
     self.assertEqual(encoder.decode(ids.tolist()), "quick brown fox")
Ejemplo n.º 2
0
 def test_py_decode(self, encoder_type):
   text = "the quick brown fox jumps \n over the lazy dog."
   e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB)
   e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB)
   ids = e1.encode(text)
   self.assertEqual(e1.decode(ids), e2.decode(ids))
Ejemplo n.º 3
0
 def test_py_encode(self, encoder_type):
   text = "the quick brown fox\n jumps over the lazy dog.\n"
   e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB)
   e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB)
   self.assertEqual(e1.encode(text), e2.encode(text))
Ejemplo n.º 4
0
 def test_vocab(self, encoder_type):
   e1 = text_encoder_utils.create_text_encoder(encoder_type, _SPM_VOCAB)
   e2 = public_parsing_ops.create_text_encoder(encoder_type, _SPM_VOCAB)
   self.assertEqual(e1.vocab_size, e2.vocab_size)
 def test_subword_decode(self):
     encoder = text_encoder_utils.create_text_encoder(
         "subword", _SUBWORD_VOCAB)
     self.assertEqual(encoder.decode([9, 10, 11, 12, 1, 0]),
                      "quick brown fox")
 def test_sentencepiece_offset(self):
     e = text_encoder_utils.create_text_encoder("sentencepiece_newline",
                                                _SPM_VOCAB)
     in_text = "the quick brown fox jumps over the lazy dog"
     ids = [25] + e.encode(in_text)
     self.assertEqual(in_text, e.decode(ids))
 def test_sentencepiece(self):
     e = text_encoder_utils.create_text_encoder("sentencepiece", _SPM_VOCAB)
     in_text = "the quick brown fox jumps over the lazy dog"
     self.assertEqual(in_text, e.decode(e.encode(in_text)))