def test_to_ids(self):
   pad, _, bos, eos = shakespeare_dataset.get_special_tokens()
   to_tokens = shakespeare_dataset._build_tokenize_fn(split_length=5)
   tokens = self.evaluate(to_tokens({'snippets': tf.constant('abc')}))
   self.assertAllEqual(tokens, [bos, 64, 42, 21, eos])
   to_tokens = shakespeare_dataset._build_tokenize_fn(split_length=12)
   tokens = self.evaluate(to_tokens({'snippets': tf.constant('star wars')}))
   self.assertAllEqual(tokens,
                       [bos, 25, 5, 64, 46, 14, 26, 64, 46, 25, eos, pad])
 def test_last_id_not_oov(self):
   _, oov, bos, eos = shakespeare_dataset.get_special_tokens()
   to_tokens = shakespeare_dataset._build_tokenize_fn(split_length=5)
   tokens = to_tokens({'snippets': tf.constant('a\r~')})
   self.assertAllEqual(tokens, [bos, 64, 86, oov, eos])