def test_ids_to_text(self):
        tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
        tokenizer.add_special_tokens(special_tokens)

        text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
        ids = tokenizer.text_to_ids(text)
        result = tokenizer.ids_to_text(ids)

        self.assertTrue(text == result)
コード例 #2
0
ファイル: test_spc_tokenizer.py プロジェクト: phymucs/NeMo
    def test_ids_to_text(self):
        tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")

        special_tokens = ["[CLS]", "[MASK]", "[SEP]"]
        tokenizer.add_special_tokens(special_tokens)

        text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
        ids = tokenizer.text_to_ids(text)
        result = tokenizer.ids_to_text(ids)

        self.assertTrue(text == result)