def _replace_codepoint(match): """ Replace the matched Unicode hex code with the corresponding unicode character """ result = self._match_to_int(match) if result == -1: return u"" return gf.safe_unichr(result)
def test_safe_unichr(self): tests = [ (65, u"A"), (90, u"Z"), (0x20, u"\u0020"), (0x200, u"\u0200"), (0x2000, u"\u2000") ] for test in tests: self.assertEqual(gf.safe_unichr(test[0]), test[1])
def test_safe_unichr(self): tests = [ (65, u"A"), (90, u"Z"), (0x20, u"\u0020"), (0x200, u"\u0200"), (0x2000, u"\u2000"), ] if gf.PY2: tests.append((0x20000, "\\U00020000".decode("unicode-escape"))) else: tests.append((0x20000, "\U00020000")) for test in tests: self.assertEqual(gf.safe_unichr(test[0]), test[1])
def test_filter_transliterate_replace_codepoint_length(self): self.filter_transliterate([u"x" + gf.safe_unichr(0x0008) + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + gf.safe_unichr(0x0088) + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + gf.safe_unichr(0x0888) + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + gf.safe_unichr(0x8888) + u"z"], [u"xaz"]) if gf.is_py2_narrow_build(): # NOTE Python 2 narrow builds cannot handle codepoints above 0x10000 correctly pass else: self.filter_transliterate([u"x" + gf.safe_unichr(0x88888) + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + gf.safe_unichr(0x108888) + u"z"], [u"xaz"])
def _process_first_group(self, group): """ Process the first group of a rule. """ if "-" in group: # range if len(group.split("-")) == 2: arr = group.split("-") start = self._parse_codepoint(arr[0]) end = self._parse_codepoint(arr[1]) else: # single char/U+xxxx start = self._parse_codepoint(group) end = start result = [] if (start > -1) and (end >= start): for index in range(start, end + 1): result.append(gf.safe_unichr(index)) return result
def test_safe_unichr(self): tests = [(65, u"A"), (90, u"Z"), (0x20, u"\u0020"), (0x200, u"\u0200"), (0x2000, u"\u2000")] for test in tests: self.assertEqual(gf.safe_unichr(test[0]), test[1])