def test_lowercase(self): normalizer = BertNormalizer( strip_accents=False, lowercase=True, handle_chinese_chars=False, clean_text=False ) output = normalizer.normalize_str("Héllò") assert output == "héllò"
def test_clean_text(self): normalizer = BertNormalizer( strip_accents=False, lowercase=False, handle_chinese_chars=False, clean_text=True ) output = normalizer.normalize_str("\ufeffHello") assert output == "Hello"