Ejemplo n.º 1
0
    def decode(self, ids, strip_extraneous=False):
        """Converts a sequence of subtoken IDs to a native string.

    Args:
      ids: a list of integers in the range [0, vocab_size)
      strip_extraneous: bool, whether to strip off extraneous tokens (EOS and
        PAD).

    Returns:
      a native string
    """
        if strip_extraneous:
            ids = strip_ids(ids, list(range(self._num_reserved_ids or 0)))
        return tokenizer.decode(self._subtoken_ids_to_tokens(ids))
Ejemplo n.º 2
0
 def test_invertibility_on_random_strings(self):
   for _ in range(1000):
     s = u"".join(six.unichr(random.randint(0, 65535)) for _ in range(10))
     self.assertEqual(s, tokenizer.decode(tokenizer.encode(s)))
Ejemplo n.º 3
0
 def test_decode(self):
   self.assertEqual(
       u"Dude - that's so cool.",
       tokenizer.decode(
           [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]))