def test_non_ascii_chars_replaced(self): text = u"Klüft skräms inför på fédéral électoral große" clean_text = clean(text) normal_text = "Kluft skrams infor pa federal electoral groe" self.assertEqual(clean_text, normal_text)
def test_html_characters_restored(self): # implicit concatenation text = 'The quick brown fox <b>jumps</b> over' ' the <a href="http://lazy.com">lazy</a> dog.' normal_text = 'The quick brown fox <b>jumps</b> over' ' the <a href="http://lazy.com">lazy</a> dog.' clean_text = clean(text) self.assertEqual(clean_text, normal_text)