def test_utf8_width_non_utf8(self): '''Test that we handle non-utf8 bytes in utf8_width without backtracing''' # utf8_width() treats non-utf8 byte sequences as undecodable so you # end up with less characters than normal. In this string: # Python-2.7+ replaces problematic characters in a different manner # than older pythons. # Python >= 2.7: # El veloz murci�lago salt� sobre el perro perezoso. # Python < 2.7: # El veloz murci�go salt�bre el perro perezoso. if len(str('\xe9la'.encode('latin1'), 'utf8', 'replace')) == 1: # Python < 2.7 tools.ok_(utf8.utf8_width(self.latin1_spanish) == 45) else: # Python >= 2.7 tools.ok_(utf8.utf8_width(self.latin1_spanish) == 50)
def test_utf8_width(self): '''Test that we find the proper number of spaces that a utf8 string will consume''' tools.ok_(utf8.utf8_width(self.utf8_japanese) == 31) tools.ok_(utf8.utf8_width(self.utf8_spanish) == 50) tools.ok_(utf8.utf8_width(self.utf8_mixed) == 23)