def test_convert_tokens_to_ids(self): vocab_tokens = [ "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "##ing" ] vocab = {} for (i, token) in enumerate(vocab_tokens): vocab[token] = i self.assertAllEqual( tokenization.convert_by_vocab(vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9])
def convert_ids_to_tokens(self, ids): return tokenization.convert_by_vocab(self.inv_vocab, ids)
def convert_tokens_to_ids(self, tokens): return tokenization.convert_by_vocab(self.vocab, tokens)