Example #1
0
 def test_encode_length(self):
     string = ["99 the quick brown fox.", "97 the quick brown"]
     ids = parsing_ops.encode(string,
                              10,
                              _SUBWORDS,
                              "subword",
                              has_length_token=True)
     self.assertAllEqual([[99, 8, 9, 10, 11, 12, 38, 1, 0, 0],
                          [97, 8, 9, 10, 11, 1, 0, 0, 0, 0]], ids)
Example #2
0
 def test_spm_prefix(self):
     string = ["25 the quick brown fox.", "23 the quick brown"]
     ids = parsing_ops.encode(string,
                              10,
                              _SPM,
                              "sentencepiece_newline",
                              has_length_token=True)
     self.assertAllEqual(25, ids[0][0])
     self.assertAllEqual(23, ids[1][0])
     decodes = parsing_ops.decode(ids, _SPM, "sentencepiece_newline")
     self.assertAllEqual(["the quick brown fox.", "the quick brown"],
                         decodes)
Example #3
0
 def test_encode(self):
     string = ["the quick brown fox.", "the quick brown"]
     ids = parsing_ops.encode(string, 10, _SUBWORDS, "subword")
     self.assertAllEqual([[8, 9, 10, 11, 12, 38, 1, 0, 0, 0],
                          [8, 9, 10, 11, 1, 0, 0, 0, 0, 0]], ids)
Example #4
0
 def test_tf_decode(self, encoder_type):
   string = tf.constant(["the quick brown fox.", "the quick brown\n"])
   ids = parsing_ops.encode(string, 10, _SPM_VOCAB, encoder_type)
   self.assertAllEqual(
       parsing_ops.decode(ids, _SPM_VOCAB, encoder_type),
       public_parsing_ops.decode(ids, _SPM_VOCAB, encoder_type))