def test_circled_latin(self): # 1 sequence of a-z for n in range(0, 26): self.assertEqual( unidecode(chr(0x24d0 + n)), chr(ord('a') + n), )
def test_mathematical_digits(self): # 5 consecutive sequences of 0-9 for n in range(0x1d7ce, 0x1d800): self.assertEqual( unidecode(chr(n)), chr(ord('0') + (n - 0x1d7ce) % 10), )
def handle_charref(self, name): if name.startswith(('x', 'X')): char = chr(int(name[1:], 16)) else: char = chr(int(name)) if char is not None: self._append(char)
def ENTITIES_REPLACEMENT(matchobj): if matchobj.group(1): return chr(int(matchobj.group(1), 16)) if matchobj.group(2): return chr(int(matchobj.group(2))) try: return ENTITIES_TO_CHARACTERS[matchobj.group(3)] except KeyError: return matchobj.group(0)
def test_mathematical_latin(self): # 13 consecutive sequences of A-Z, a-z with some codepoints # undefined. We just count the undefined ones and don't check # positions. empty_count = 0 for n in range(0x1d400, 0x1d6a4): a = unidecode(chr(n)) if n % 52 < 26: b = chr(ord('A') + n % 26) else: b = chr(ord('a') + n % 26) if not a: empty_count += 1 else: self.assertEqual(a, b) self.assertEqual(empty_count, 24)
def fixup(m): text = m.group(0) if text[:2] == "&#": # character reference try: if text[:3] == "&#x": return chr(int(text[3:-1], 16)) else: return chr(int(text[2:-1])) except ValueError: pass else: # named entity try: text = chr(name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is
def fixup(m): text = m.group(0) if text[:2] == "&#": # character reference try: if text[:3] == "&#x": return chr(int(text[3:-1], 16)) else: return chr(int(text[2:-1])) except ValueError: pass else: # named entity try: text = chr(name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is
def test_bmp(self): # Just check that it doesn't throw an exception for n in range(0, 0x10000): unidecode(chr(n))
def test_ascii(self): for n in range(0, 128): self.assertEqual( unidecode(chr(n)), chr(n), )