def test_full_tokenizer(self):
        tokenizer = TransfoXLTokenizer(vocab_file=self.vocab_file, lower_case=True)

        tokens = tokenizer.tokenize("<unk> UNwanted , running")
        self.assertListEqual(tokens, ["<unk>", "unwanted", ",", "running"])

        self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [0, 4, 8, 7])
    def test_full_tokenizer_moses_numbers(self):
        tokenizer = TransfoXLTokenizer(lower_case=False)
        text_in = "Hello (bracket) and side-scrolled [and] Henry's $5,000 with 3.34 m. What's up!?"
        tokens_out = [
            "Hello",
            "(",
            "bracket",
            ")",
            "and",
            "side",
            "@-@",
            "scrolled",
            "[",
            "and",
            "]",
            "Henry",
            "'s",
            "$",
            "5",
            "@,@",
            "000",
            "with",
            "3",
            "@.@",
            "34",
            "m",
            ".",
            "What",
            "'s",
            "up",
            "!",
            "?",
        ]

        self.assertListEqual(tokenizer.tokenize(text_in), tokens_out)

        self.assertEqual(tokenizer.convert_tokens_to_string(tokens_out),
                         text_in)
    def test_full_tokenizer_no_lower(self):
        tokenizer = TransfoXLTokenizer(lower_case=False)

        self.assertListEqual(
            tokenizer.tokenize(" \tHeLLo ! how  \n Are yoU ?  "), ["HeLLo", "!", "how", "Are", "yoU", "?"]
        )
    def test_full_tokenizer_lower(self):
        tokenizer = TransfoXLTokenizer(lower_case=True)

        self.assertListEqual(
            tokenizer.tokenize(" \tHeLLo ! how  \n Are yoU ?  "), ["hello", "!", "how", "are", "you", "?"]
        )