def test_ids_to_text(self, test_data_dir): tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name) text = "a b c e f g h i" ids = tokenizer.text_to_ids(text) result = tokenizer.ids_to_text(ids) assert text == result
def test_text_to_ids(self, test_data_dir): tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name) text = "<BOS> a b c <UNK> e f g h i <EOS>" tokens = tokenizer.text_to_ids(text) assert tokens.count(tokenizer.bos_id) == 0 assert tokens.count(tokenizer.unk_id) == 0 assert tokens.count(tokenizer.eos_id) == 0