def testTokenInVocab(self): with self.session(use_gpu=False): vocab = [ '<S>', '</S>', '<UNK>', '<epsilon>', 'a', 'b c d e', 'øut', 'über', '♣', '愤青', '←', ] self.assertTrue(py_x_ops.token_in_vocab('a', vocab=vocab).eval()) self.assertTrue(py_x_ops.token_in_vocab('<UNK>', vocab=vocab).eval()) self.assertTrue( py_x_ops.token_in_vocab(['b c d e', '♣'], vocab=vocab).eval().all()) self.assertFalse(py_x_ops.token_in_vocab('unknown', vocab=vocab).eval())
def _StringToToken(self, tokstr): return tf.where(py_x_ops.token_in_vocab(tokstr, vocab=self._pieces), py_x_ops.vocab_token_to_id(tokstr, vocab=self._pieces), tf.broadcast_to(NO_TOKEN, tf.shape(tokstr)))