def ucn_to_unicode(ucn): """Convert a Unicode Universal Character Number (e.g. "U+4E00" or "4E00") to Python unicode (u'\\u4e00')""" if isinstance(ucn, string_types): ucn = ucn.strip("U+") if len(ucn) > int(4): char = b'\U' + format(int(ucn, 16), '08x').encode('latin1') char = char.decode('unicode_escape') else: char = unichr(int(ucn, 16)) else: char = unichr(ucn) assert isinstance(char, text_type) return char
def chars(): chars = [] while len(chars) < 3: c = 0x4E00 + random.randint(1, 333) char = { 'hex': c, 'char': unichr(int(c)), 'ucn': conversion.python_to_ucn(unichr(int(c))), } if char not in chars: chars.append(char) metadata.bind.execute(unicode_table.insert(), chars) return chars