def ucnstring_to_python(ucn_string): """Return string with Unicode UCN (e.g. "U+4E00") to native Python Unicode (u'\\u4e00'). """ res = re.findall("U\+[0-9a-fA-F]*", ucn_string) for r in res: ucn_string = ucn_string.replace(text_type(r), text_type(ucn_to_unicode(r))) ucn_string = ucn_string.encode('utf-8') assert isinstance(ucn_string, bytes) return ucn_string
def python_to_ucn(uni_char, as_bytes=False): """ Return UCN character from Python Unicode character. Converts a one character Python unicode string (e.g. u'\\u4e00') to the corresponding Unicode UCN ('U+4E00'). """ ucn = uni_char.encode('unicode_escape').decode('latin1') ucn = text_type(ucn).replace('\\', '').upper().lstrip('U') if len(ucn) > int(4): # get rid of the zeroes that Python uses to pad 32 byte UCNs ucn = ucn.lstrip("0") ucn = "U+" + ucn.upper() if as_bytes: ucn = ucn.encode('latin1') return ucn
def python_to_ucn(uni_char, as_bytes=False): """Return UCN character from Python Unicode character. Converts a one character Python unicode string (e.g. u'\\u4e00') to the corresponding Unicode UCN ('U+4E00'). """ ucn = uni_char.encode('unicode_escape').decode('latin1') ucn = text_type(ucn).replace('\\', '').upper().lstrip('U') if len(ucn) > int(4): # get rid of the zeroes that Python uses to pad 32 byte UCNs ucn = ucn.lstrip("0") ucn = "U+" + ucn.upper() if as_bytes: ucn = ucn.encode('latin1') return ucn
def test_text_type(): c1 = '(same as U+7A69 穩) firm; stable; secure' c2 = text_type() assert isinstance(c1, string_types) assert isinstance(c2, text_type)