Beispiel #1
0
    def test_ids_to_text(self, test_data_dir):
        tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name)

        text = "a b c e f g h i"
        ids = tokenizer.text_to_ids(text)
        result = tokenizer.ids_to_text(ids)

        assert text == result
Beispiel #2
0
    def test_text_to_ids(self, test_data_dir):
        tokenizer = YouTokenToMeTokenizer(test_data_dir + self.model_name)

        text = "<BOS> a b c <UNK> e f g h i <EOS>"
        tokens = tokenizer.text_to_ids(text)

        assert tokens.count(tokenizer.bos_id) == 0
        assert tokens.count(tokenizer.unk_id) == 0
        assert tokens.count(tokenizer.eos_id) == 0