def test_tokens_to_text(self, test_data_dir): tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name) text = "a b c e f g h i" tokens = tokenizer.text_to_tokens(text) result = tokenizer.tokens_to_text(tokens) assert text == result
def test_text_to_tokens(self, test_data_dir): tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name) text = "<BOS> a b c e <UNK> f g h i <EOS>" tokens = tokenizer.text_to_tokens(text) assert tokens.count("<BOS>") == 0 assert tokens.count("<UNK>") == 0 assert tokens.count("<EOS>") == 0