Exemple #1
0
    def test_tokens_to_text(self, test_data_dir):
        tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name)

        text = "a b c e f g h i"
        tokens = tokenizer.text_to_tokens(text)
        result = tokenizer.tokens_to_text(tokens)

        assert text == result
Exemple #2
0
    def test_text_to_tokens(self, test_data_dir):
        tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name)

        text = "<BOS> a b c e <UNK> f g h i <EOS>"
        tokens = tokenizer.text_to_tokens(text)

        assert tokens.count("<BOS>") == 0
        assert tokens.count("<UNK>") == 0
        assert tokens.count("<EOS>") == 0