def test_ids_to_text(self, test_data_dir): tokenizer = SentencePieceTokenizer(test_data_dir + self.model_name) text = "<cls> a b c <sep> e f g h i </s>" ids = tokenizer.text_to_ids(text) result = tokenizer.ids_to_text(ids) assert text == result
def test_ids_to_text(self, test_data_dir): tokenizer = SentencePieceTokenizer(test_data_dir + self.model_name) special_tokens = MODEL_SPECIAL_TOKENS tokenizer.add_special_tokens(special_tokens) text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]" ids = tokenizer.text_to_ids(text) result = tokenizer.ids_to_text(ids) assert text == result