def testTokenInVocab(self):
     with self.session(use_gpu=False):
         vocab = [
             '<S>',
             '</S>',
             '<UNK>',
             '<epsilon>',
             'a',
             'b c d e',
             'øut',
             'über',
             '♣',
             '愤青',
             '←',
         ]
         self.assertTrue(ops.token_in_vocab('a', vocab=vocab).eval())
         self.assertTrue(ops.token_in_vocab('<UNK>', vocab=vocab).eval())
         self.assertTrue(
             ops.token_in_vocab(['b c d e', '♣'], vocab=vocab).eval().all())
         self.assertFalse(ops.token_in_vocab('unknown', vocab=vocab).eval())
Example #2
0
 def _StringToToken(self, tokstr):
     return tf.where(ops.token_in_vocab(tokstr, vocab=self._pieces),
                     ops.vocab_token_to_id(tokstr, vocab=self._pieces),
                     tf.broadcast_to(NO_TOKEN, tf.shape(tokstr)))