def test_vocab_token_to_load_id(self): ''' test vocab token to id which is loaded from vocab file''' with self.cached_session(use_gpu=False, force_gpu=False): vocab = [ '<s> 3', '</s> 5', '<unk> 7', '<epsilon> 9', 'a 2', 'b c d e 4', 'øut 8', 'über 10', '♣ -1', '愤青 -3', '← -5', ] self.assertEqual( 3, py_x_ops.vocab_token_to_id( '<s>', vocab=vocab, load_token_ids_from_vocab=True).eval()) self.assertEqual( 2, py_x_ops.vocab_token_to_id( 'a', vocab=vocab, load_token_ids_from_vocab=True).eval()) self.assertAllEqual([4, -1], py_x_ops.vocab_token_to_id( ['b c d e', '♣'], vocab=vocab, load_token_ids_from_vocab=True).eval()) self.assertEqual( 7, py_x_ops.vocab_token_to_id( 'unknown', vocab=vocab, load_token_ids_from_vocab=True).eval())
def test_vocab_token_to_id(self): ''' tset vocab token to id''' with self.cached_session(use_gpu=False, force_gpu=False): vocab = [ '<s>', '</s>', '<unk>', '<epsilon>', 'a', 'b c d e', 'øut', 'über', '♣', '愤青', '←', ] self.assertEqual( 0, py_x_ops.vocab_token_to_id('<s>', vocab=vocab).eval()) self.assertEqual( 4, py_x_ops.vocab_token_to_id('a', vocab=vocab).eval()) self.assertAllEqual([5, 8], py_x_ops.vocab_token_to_id(['b c d e', '♣'], vocab=vocab).eval()) self.assertEqual( 2, py_x_ops.vocab_token_to_id('unknown', vocab=vocab).eval())