def test_load_no_merges_file(self):
        tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname)

        with tempfile.TemporaryDirectory() as tmp_dirname:
            tokenizer.save_pretrained(tmp_dirname)
            os.remove(os.path.join(tmp_dirname, "merges.txt"))

            # load tokenizer without merges file should not throw an error
            tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname)

        with tempfile.TemporaryDirectory() as tmp_dirname:
            # save tokenizer and load again
            tokenizer.save_pretrained(tmp_dirname)
            tokenizer = Speech2Text2Tokenizer.from_pretrained(tmp_dirname)

        self.assertIsNotNone(tokenizer)
    def test_tokenizer_decode(self):
        tokenizer = Speech2Text2Tokenizer.from_pretrained(self.tmpdirname)

        # make sure @@ is correctly concatenated
        token_ids = [4, 6, 8, 7, 10]  # ["here@@", "couple", "words", "of@@", "the"]
        output_string = tokenizer.decode(token_ids)

        self.assertTrue(output_string == "herecouple words ofthe")