def getdetails(self, text): chardetails = {} for character in text: chardetails[character] = {} chardetails[character]['Name'] = unicodedata.name(character) chardetails[character]['HTML Entity'] = str(ord(character)) chardetails[character]['Code point'] = repr(character) try: chardetails[character]['Numeric Value'] = \ unicodedata.numeric(character) except: pass try: chardetails[character]['Decimal Value'] = \ unicodedata.decimal(character) except: pass try: chardetails[character]['Digit'] = unicodedata.digit(mychar) except: pass chardetails[character]['Alphabet'] = str(character.isalpha()) chardetails[character]['Digit'] = str(character.isdigit()) chardetails[character]['AlphaNumeric'] = str(character.isalnum()) chardetails[character]['Canonical Decomposition'] = \ unicodedata.decomposition(character) chardetails['Characters'] = list(text) return chardetails
def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters( ): set_numeric_hex = set(numeric_hex) set_numeric_chars = set(numeric_chars) set_digit_chars = set(digit_chars) set_decimal_chars = set(decimal_chars) for i in py23_range(0X110000): try: a = py23_unichr(i) except ValueError: break if a in set('0123456789'): continue if unicodedata.numeric(a, None) is not None: assert i in set_numeric_hex assert a in set_numeric_chars if unicodedata.digit(a, None) is not None: assert i in set_numeric_hex assert a in set_digit_chars if unicodedata.decimal(a, None) is not None: assert i in set_numeric_hex assert a in set_decimal_chars assert set_decimal_chars.isdisjoint(digits_no_decimals) assert set_digit_chars.issuperset(digits_no_decimals) assert set_decimal_chars.isdisjoint(numeric_no_decimals) assert set_numeric_chars.issuperset(numeric_no_decimals)
def get_type_numeric(pred, db): cur = db.conn.cursor() types = {} obj_list = [] # cur.execute("""select max(a.obj1), min(a.obj1), avg(a.obj1), percentile_disc(0.1) within group (order by a.obj1) as p10, # percentile_disc(0.9) within group (order by a.obj1) as p90 # from (select cast(obj as bigint) as obj1 # from spot_triples where pred=(%s) and obj_type='int') a""", [pred]) query = "select obj from " + db.spot_tb + " where pred=(%s) and obj_type='int' " cur.execute(query, [pred]) for row in cur: val = 0 try: val = int(row[0]) except ValueError: # print (pred, row) try: unicode_char_list = ''.join([ str(unicodedata.decimal(d, -1)) for d in row[0].decode('utf8') ]) val = int(unicode_char_list) except Exception as e: print(pred, row[0], e) else: obj_list.append(abs(val)) cur.close() types['max'] = max(obj_list) types['min'] = min(obj_list) types['avg'] = sum(obj_list) / float(len(obj_list)) np.asarray(obj_list) types['p10'] = np.percentile(obj_list, 10) types['p90'] = np.percentile(obj_list, 90) return types
def conv(unicode_arabic_date): new_date = '' for d in unicode_arabic_date: if d != ':' and d != '/': new_date += str(unicodedata.decimal(d)) elif d == ':': new_date += ':' elif d == '/': new_date += '/' return new_date
def _explain_char(self, ch, further): try: name = unicodedata.name(ch) except ValueError: name = f'[U+{hex(ord(ch))[2:]}]' if not further: return name + f'({ch})' infos = { 'category': unicodedata.category(ch), 'direction': unicodedata.bidirectional(ch), 'east asian width': unicodedata.east_asian_width(ch) } decomposition = unicodedata.decomposition(ch) if decomposition: infos['decomposition'] = decomposition try: infos['digit value'] = unicodedata.digit(ch) except ValueError: pass try: infos['decimal value'] = unicodedata.decimal(ch) except ValueError: pass try: infos['numeric value'] = unicodedata.numeric(ch) except ValueError: pass comb = unicodedata.combining(ch) if comb != 0: infos['combining class'] = str(comb) mirrored = unicodedata.mirrored(ch) if mirrored: infos['mirrored'] = 'yes' if hasattr(unicodedata, 'is_normalized'): forms = [] for form in ('NFC', 'NFD', 'NFKC', 'NFKD'): if unicodedata.is_normalized(form, ch): forms.append(form) if forms: infos['normalized'] = f'yes: {", ".join(forms)}' else: infos['normalized'] = 'no' else: infos['normalized'] = 'unavailable' info = ', '.join([f'{k}: {v}' for k, v in infos.items()]) return f'{name}: {ch!r} ({info})'
def overview(tree_item): """ Returns an overview of the character """ char = tree_item.obj return TEMPLATE.format(unicodedata.name(char, '<NO NAME AVAILABLE>'), char, unicodedata.decimal(char, ''), unicodedata.digit(char, ''), unicodedata.numeric(char, ''), unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.east_asian_width(char), unicodedata.mirrored(char), unicodedata.decomposition(char))
def char2info(ch): name = U.name(ch, None) decimal = U.decimal(ch, None) digit = U.digit(ch, None) numeric = U.numeric(ch, None) category = U.category(ch) bidirectional = U.bidirectional(ch) combining = U.combining(ch) east_asian_width = U.east_asian_width(ch) mirrored = U.mirrored(ch) decomposition = U.decomposition(ch) unicode = ord(ch) unicode_hex = hex(unicode) return dict(locals())
def main(): kb_name = 'dbp_map' filein = './count_information/integer_per_pred_per_sub_' fileout = './count_information/avg_integer_per_pred_per_sub_' with open(filein + kb_name + '.csv') as fin: reader = csv.reader(fin) prev_pred = None prev_sub = None count_val = [] bufferout = [] for row in tqdm(reader): sub = row[0] pred = row[1] try: val = int(row[2]) except ValueError: try: unicode_char_list = ''.join([ str(unicodedata.decimal(d, -1)) for d in row[2].decode('utf8') ]) val = int(unicode_char_list) except Exception as e: print(sub, pred, row[2], e) continue if sub == prev_sub and pred == prev_pred: count_val.append(abs(val)) continue elif prev_sub is not None and prev_pred is not None: bufferout.append([ prev_sub, prev_pred, int(sum(count_val) / len(count_val)) ]) prev_sub = sub prev_pred = pred count_val = [abs(val)] else: prev_sub = sub prev_pred = pred count_val.append(abs(val)) if len(bufferout) == 1000: with open(fileout + kb_name + '.csv', 'a') as fout: writer = csv.writer(fout, quoting=csv.QUOTE_MINIMAL) writer.writerows(bufferout) bufferout = []
def test_compare_functions(self): def getX(fun, code): try: return getattr(unicodedb_5_2_0, fun)(code) except KeyError: return -1 for code in range(0x10000): char = unichr(code) assert unicodedata.digit(char, -1) == getX('digit', code) assert unicodedata.numeric(char, -1) == getX('numeric', code) assert unicodedata.decimal(char, -1) == getX('decimal', code) assert unicodedata.category(char) == unicodedb_5_2_0.category(code) assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code) assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code) assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code) assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
def main(): try: v = bytes(int(x, 16) for x in sys.argv[1:]) c = v.decode('utf8') print('gryph: %s' % c) print('codepoint: U+%x' % ord(c)) print('name: %s' % unicodedata.name(c, 'Unknown')) print('decimal: %s' % unicodedata.decimal(c, 'Unknown')) print('digit: %s' % unicodedata.digit(c, 'Unknown')) print('numeric: %s' % unicodedata.numeric(c, 'Unknown')) print('category: %s' % unicodedata.category(c)) print('bidirectional: %s' % unicodedata.bidirectional(c)) print('combining: %s' % unicodedata.combining(c)) print('east_asian_width: %s' % unicodedata.east_asian_width(c)) print('mirrored: %s' % unicodedata.mirrored(c)) print('decomposition: %s' % unicodedata.decomposition(c)) except Exception as ex: print('ERROR: %s' % ex)
def __init__(self, symbol): self.symbol = symbol self.name = u.name(symbol, 'NO_NAME_FOUND') self.decimal = u.decimal(self.symbol, -1) self.digit = u.digit(self.symbol, -1) self.numeric = u.numeric(self.symbol, -1) self.category = u.category(self.symbol) self.bidirectional = u.bidirectional(self.symbol) self.combining = u.combining(self.symbol) self.east_asian_width = u.east_asian_width(self.symbol) self.mirrored = u.mirrored(self.symbol) self.decomposition = u.decomposition(self.symbol) self.normalize_nfc = u.normalize('NFC', self.symbol) self.normalize_nkfc = u.normalize('NFKC', self.symbol) self.normalize_nfd = u.normalize('NFD', self.symbol) self.normalize_nkfd = u.normalize('NFKD', self.symbol) if Config.debug['unicode']: self.print_debug()
def analyze(text): results = [] mecab = MeCab.Tagger('-Ounidic -d %s -r %s' % (mecab_dicdir, os.path.join(dicrc_dir, 'dicrc'))) # 小節単位に分割 text = text.strip() text = re.sub(phrase_split_chars_uni, ' ', text) text = text.encode('utf-8').replace('\r\n', '\n').replace('\n', ' ') text = text.split('===') text = map(lambda p: p.strip(), text) lyrics = map(lambda p: p.split(' '), text) # 読みとアクセントの解析 for i, phrases in enumerate(lyrics): temp = [] for phrase in phrases: for word in mecab.parse(phrase).decode('utf-8').split('\n'): features = word.split('\t') if len(features) == 4: atypes = [] acons = [] try: # アクセント型 atypes = map(lambda n: unicodedata.decimal(n), features[2].split(',')) acons = features[3].split(',') except TypeError: # アクセントが不明 pass prono = features[0] # 読み if not prono or not re.match(ok_chars, prono): continue if len(atypes) > 0: prono = insert_accent(prono, atypes[0]) temp.append(prono) temp.append(' ') results.append({'lyric': text[i], 'phoneme': '/'.join(temp).rstrip()}) return results
def test_compare_functions(self): import unicodedata # CPython implementation def getX(fun, code): if fun == 'numeric' and code in self.diff_numeric: return -1 try: return getattr(unicodedb_4_1_0, fun)(code) except KeyError: return -1 for code in range(0x10000): char = unichr(code) assert unicodedata.digit(char, -1) == getX('digit', code) assert unicodedata.numeric(char, -1) == getX('numeric', code) assert unicodedata.decimal(char, -1) == getX('decimal', code) assert unicodedata.category(char) == unicodedb_4_1_0.category(code) assert unicodedata.bidirectional(char) == unicodedb_4_1_0.bidirectional(code) assert unicodedata.decomposition(char) == unicodedb_4_1_0.decomposition(code) assert unicodedata.mirrored(char) == unicodedb_4_1_0.mirrored(code) assert unicodedata.combining(char) == unicodedb_4_1_0.combining(code)
def update_quantity(request, food_id): food = get_object_or_404(FoodItems, id=food_id) try: selected_choice = request.POST['quantity'] except (KeyError, food.DoesNotExist): # Redisplay the question voting form. return render(request, 'foods/detail.html', { 'fooditems': food, 'error_message': "You didn't select a quantity.", }) else: food.quantity = selected_choice food.save() # Always return an HttpResponseRedirect after successfully dealing # with POST data. This prevents data from being posted twice if a # user hits the Back button. total_cost = '$' + str(decimal(food.quantity) * food.item_cost) return { 'fooditems': food, 'total_cost': total_cost, }
def analyze(text): results = [] mecab = MeCab.Tagger('-Ounidic -d %s -r dicrc' % mecab_dicdir) # 小節単位に分割 text = text.strip() text = re.sub(phrase_split_chars_uni, ' ', text) text = text.encode('utf-8').replace('\r\n', '\n').replace('\n', ' ') text = text.split('===') text = map(lambda p: p.strip(), text) lyrics = map(lambda p: p.split(' '), text) # 読みとアクセントの解析 for i, phrases in enumerate(lyrics): temp = [] for phrase in phrases: for word in mecab.parse(phrase).decode('utf-8').split('\n'): features = word.split('\t') if len(features) == 4: atypes = [] acons = [] try: # アクセント型 atypes = map(lambda n: unicodedata.decimal(n), features[2].split(',')) acons = features[3].split(',') except TypeError: # アクセントが不明 pass prono = features[0] # 読み if not prono or not re.match(ok_chars, prono): continue if len(atypes) > 0: prono = insert_accent(prono, atypes[0]) temp.append(prono) temp.append(' ') results.append({'lyric': text[i], 'phoneme': '/'.join(temp).rstrip()}) return results
def test_ipy2_gh357(self): """https://github.com/IronLanguages/ironpython2/issues/357""" import unicodedata if is_cli: self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>') else: self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D') self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d') self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0) self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d') self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0) self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d') self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0) self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo') self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L') self.assertEqual(unicodedata.combining(u'\u4e2d'), 0) self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W') self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0) self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
print test_unicodedata() # Some additional checks of the API: print 'API:', verify(unicodedata.digit(u'A',None) is None) verify(unicodedata.digit(u'9') == 9) verify(unicodedata.digit(u'\u215b',None) is None) verify(unicodedata.digit(u'\u2468') == 9) verify(unicodedata.numeric(u'A',None) is None) verify(unicodedata.numeric(u'9') == 9) verify(unicodedata.numeric(u'\u215b') == 0.125) verify(unicodedata.numeric(u'\u2468') == 9.0) verify(unicodedata.decimal(u'A',None) is None) verify(unicodedata.decimal(u'9') == 9) verify(unicodedata.decimal(u'\u215b',None) is None) verify(unicodedata.decimal(u'\u2468',None) is None) verify(unicodedata.category(u'\uFFFE') == 'Cn') verify(unicodedata.category(u'a') == 'Ll') verify(unicodedata.category(u'A') == 'Lu') verify(unicodedata.bidirectional(u'\uFFFE') == '') verify(unicodedata.bidirectional(u' ') == 'WS') verify(unicodedata.bidirectional(u'A') == 'L') verify(unicodedata.decomposition(u'\uFFFE') == '') verify(unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034')
def decimal(self, default=None): return ud.decimal(self.char, default)
unicode_digit = defaultdict(list) unicode_decimal = defaultdict(list) for c in map(chr, range(sys.maxunicode + 1)): unicode_category[unicodedata.category(c)].append(c) if unicodedata.bidirectional(c): unicode_bidirectional[unicodedata.bidirectional(c)].append(c) if unicodedata.numeric(c, None) is not None: unicode_numeric[unicodedata.numeric(c)].append(c) if unicodedata.digit(c, None) is not None: unicode_digit[unicodedata.digit(c)].append(c) if unicodedata.decimal(c, None) is not None: unicode_decimal[unicodedata.decimal(c)].append(c) # get all punctuation punctuation = set() for class_name in unicode_category.keys(): if class_name.startswith('P') or class_name.startswith('S'): print(class_name) for char in unicode_category[class_name]: punctuation.add(char) with open('punctuation_lookup.py', 'w', encoding='ascii') as f: f.write('PUNCTUATION = {\n') for p in sorted(punctuation): if p == '"':
def ascii_char_smash(char): """Smash a single Unicode character into an ASCII representation. >>> ascii_char_smash(u"\N{KATAKANA LETTER SMALL A}") 'a' >>> ascii_char_smash(u"\N{KATAKANA LETTER A}") 'A' >>> ascii_char_smash(u"\N{KATAKANA LETTER KA}") 'KA' >>> ascii_char_smash(u"\N{HIRAGANA LETTER SMALL A}") 'a' >>> ascii_char_smash(u"\N{HIRAGANA LETTER A}") 'A' >>> ascii_char_smash(u"\N{BOPOMOFO LETTER ANG}") 'ANG' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER H WITH STROKE}") 'H' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER LONG S}") 's' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER THORN}") 'TH' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER THORN}") 'th' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER I WITH OGONEK}") 'I' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER AE}") 'AE' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}") 'Ae' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER A WITH DIAERESIS}") 'ae' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}") 'Oe' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER O WITH DIAERESIS}") 'oe' >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}") 'Ue' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER U WITH DIAERESIS}") 'ue' >>> ascii_char_smash(u"\N{LATIN SMALL LETTER SHARP S}") 'ss' Latin-1 and other symbols are lost >>> ascii_char_smash(u"\N{POUND SIGN}") '' Unless they also happen to be letters of some kind, such as greek >>> ascii_char_smash(u"\N{MICRO SIGN}") 'mu' Fractions >>> ascii_char_smash(u"\N{VULGAR FRACTION ONE HALF}") '1/2' """ mapping = { u"\N{LATIN CAPITAL LETTER AE}": "AE", u"\N{LATIN SMALL LETTER AE}": "ae", u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}": "Ae", u"\N{LATIN SMALL LETTER A WITH DIAERESIS}": "ae", u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}": "Oe", u"\N{LATIN SMALL LETTER O WITH DIAERESIS}": "oe", u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}": "Ue", u"\N{LATIN SMALL LETTER U WITH DIAERESIS}": "ue", u"\N{LATIN SMALL LETTER SHARP S}": "ss", u"\N{LATIN CAPITAL LETTER THORN}": "TH", u"\N{LATIN SMALL LETTER THORN}": "th", u"\N{FRACTION SLASH}": "/", u"\N{MULTIPLICATION SIGN}": "x", u"\N{KATAKANA-HIRAGANA DOUBLE HYPHEN}": "=", } # Pass through ASCII if ord(char) < 127: return char # Handle manual mappings if mapping.has_key(char): return mapping[char] # Regress to decomposed form and recurse if necessary. decomposed = unicodedata.normalize("NFKD", char) if decomposed != char: out = StringIO() for char in decomposed: out.write(ascii_char_smash(char)) return out.getvalue() # Handle whitespace if char.isspace(): return " " # Handle digits if char.isdigit(): return unicodedata.digit(char) # Handle decimal (probably pointless given isdigit above) if char.isdecimal(): return unicodedata.decimal(char) # Handle numerics, such as 1/2 if char.isnumeric(): formatted = "%f" % unicodedata.numeric(char) # Strip leading and trailing 0 return formatted.strip("0") # Ignore unprintables, such as the accents we denormalized if not char.isalnum(): return "" # Return modified latin characters as just the latin part. name = unicodedata.name(char) match = re.search("LATIN CAPITAL LIGATURE (\w+)", name) if match is not None: return match.group(1) match = re.search("LATIN SMALL LIGATURE (\w+)", name) if match is not None: return match.group(1).lower() match = re.search("(?:LETTER SMALL|SMALL LETTER) (\w+)", name) if match is not None: return match.group(1).lower() match = re.search("LETTER (\w+)", name) if match is not None: return match.group(1) # Something we can't represent. Return empty string. return ""
import unicodedata import tangled_up_in_unicode as unicode_data if __name__ == "__main__": basic = [ { "property": "Name", "standard": unicodedata.name, "new": unicode_data.name }, { "property": "Decimal", "standard": lambda x: unicodedata.decimal(x, -1), "new": lambda x: unicode_data.decimal(x, -1), }, { "property": "Digit", "standard": lambda x: unicodedata.digit(x, -1), "new": lambda x: unicode_data.digit(x, -1), }, { "property": "Numeric", "standard": lambda x: unicodedata.numeric(x, -1.0), "new": lambda x: unicode_data.numeric(x, -1.0), }, { "property": "Category", "standard": unicodedata.category, "new": unicode_data.category, },
''' unicodedata 模块 unicodedata 模块包含了 Unicode 字符的属性, 例如字符类别, 分解数据, 以及数值. ''' import unicodedata for char in [u'A', u'-', u'1', u'w']: print(char, '-> ', end='') print(repr(char), '-> ', end='') print(unicodedata.category(char), '-> ', end='') print(repr(unicodedata.decomposition(char)), '-> ', end='') print(unicodedata.decimal(char, None), '=> ', end='') print(unicodedata.numeric(char, None), end='') print()
def decimal(self): """Return unicodedata.decimal.""" try: return unicodedata.decimal(self.c) except ValueError: return None
from natsort.compat.py23 import py23_unichr from natsort.unicode_numeric_hex import numeric_hex # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: character = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(character, None) is None: continue # pragma: no cover numeric_chars.append(character) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # The decimal characters are a subset of the numberals # (probably of the digits, but let's be safe). decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None] # Create a single string with the above data. decimals = "".join(decimal_chars) digits = "".join(digit_chars) numeric = "".join(numeric_chars) digits_no_decimals = "".join([x for x in digits if x not in decimals]) numeric_no_decimals = "".join([x for x in numeric if x not in decimals])
def getdetails(self, text): """ Gives details of all charecters in the given string. :param text: The unicode string to be examined. :type text: str. :returns: dictionary with details. :: >>> import chardetails.getInstance >>> a = getInstance() >>> a.getdetails(u"run") {'Characters': [u'r', u'u', u'n'], u'n': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'n'", 'Digit': 'False', 'HTML Entity': '110', 'Name': 'LATIN SMALL LETTER N'}, u'r': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'r'", 'Digit': 'False', 'HTML Entity': '114', 'Name': 'LATIN SMALL LETTER R'}, u'u': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'u'", 'Digit': 'False', 'HTML Entity': '117', 'Name': 'LATIN SMALL LETTER U'}} """ chardetails = {} for character in text: chardetails[character] = {} chardetails[character]['Name'] = unicodedata.name(character) chardetails[character]['HTML Entity'] = str(ord(character)) chardetails[character]['Code point'] = repr(character) try: chardetails[character]['Numeric Value'] = \ unicodedata.numeric(character) except: pass try: chardetails[character]['Decimal Value'] = \ unicodedata.decimal(character) except: pass try: chardetails[character]['Digit'] = unicodedata.digit(character) except: pass chardetails[character]['Alphabet'] = str(character.isalpha()) chardetails[character]['Digit'] = str(character.isdigit()) chardetails[character]['AlphaNumeric'] = str(character.isalnum()) chardetails[character]['Canonical Decomposition'] = \ unicodedata.decomposition(character) chardetails['Characters'] = list(text) return chardetails
print test_unicodedata() # Some additional checks of the API: print "API:", verify(unicodedata.digit(u"A", None) is None) verify(unicodedata.digit(u"9") == 9) verify(unicodedata.digit(u"\u215b", None) is None) verify(unicodedata.digit(u"\u2468") == 9) verify(unicodedata.numeric(u"A", None) is None) verify(unicodedata.numeric(u"9") == 9) verify(unicodedata.numeric(u"\u215b") == 0.125) verify(unicodedata.numeric(u"\u2468") == 9.0) verify(unicodedata.decimal(u"A", None) is None) verify(unicodedata.decimal(u"9") == 9) verify(unicodedata.decimal(u"\u215b", None) is None) verify(unicodedata.decimal(u"\u2468", None) is None) verify(unicodedata.category(u"\uFFFE") == "Cn") verify(unicodedata.category(u"a") == "Ll") verify(unicodedata.category(u"A") == "Lu") verify(unicodedata.bidirectional(u"\uFFFE") == "") verify(unicodedata.bidirectional(u" ") == "WS") verify(unicodedata.bidirectional(u"A") == "L") verify(unicodedata.decomposition(u"\uFFFE") == "") verify(unicodedata.decomposition(u"\u00bc") == "<fraction> 0031 2044 0034")
def test_against_unicodedata(): ''' Check against `unicodedata` or `unicodedata2` if available with the correct version of Unicode. ''' if unicodedata is None: raise Exception( 'Packages unicodedata and unicodedata2 are not available with the necessary version of Unicode ({0}); many consistency tests were omitted' .format(mdl.UNICODE_VERSION)) ucdf = mdl.UCDFiles() ud = ucdf.unicodedata for cp in range(0, 0x10FFFF + 1): c = chr(cp) if cp in ud: name = unicodedata.name(c, None) if name is None: # Handle missing names in unicodedata # Compare Table 4-13 in Unicode Standard # http://www.unicode.org/versions/Unicode9.0.0/ch04.pdf if 0x17000 <= cp <= 0x187EC: assert ud[cp]['Name'] == 'TANGUT IDEOGRAPH-{0:04X}'.format( cp) else: assert ud[cp]['Name'] == '' else: assert name == ud[cp]['Name'] decimal, digit, numeric = (unicodedata.decimal(c, None), unicodedata.digit(c, None), unicodedata.numeric(c, None)) if any(x is not None for x in (decimal, digit, numeric)): if decimal is not None: assert decimal == int(ud[cp]['Numeric_Value']) and ud[cp][ 'Numeric_Type'] == 'Decimal' and digit is not None and decimal is not None elif digit is not None: assert digit == int(ud[cp]['Numeric_Value']) and ud[cp][ 'Numeric_Type'] == 'Digit' and decimal is None and numeric is not None elif numeric is not None: try: num = float(ud[cp]['Numeric_Value']) except ValueError: if '/' in ud[cp]['Numeric_Value']: numerator, denominator = ud[cp][ 'Numeric_Value'].split('/') num = float(numerator) / float(denominator) else: raise assert numeric == num and ud[cp][ 'Numeric_Type'] == 'Numeric' and digit is None and decimal is None else: raise Exception else: assert ud[cp]['Numeric_Value'] == 'NaN' and ud[cp][ 'Numeric_Type'] == 'None' assert unicodedata.category(c) == ud[cp]['General_Category'] assert unicodedata.bidirectional(c) == ud[cp]['Bidi_Class'] assert unicodedata.combining(c) == int( ud[cp]['Canonical_Combining_Class']) assert unicodedata.mirrored(c) == ud[cp]['Bidi_Mirrored'] if unicodedata.decomposition(c) == '': if ud[cp]['Name'].startswith('HANGUL SYLLABLE'): # The Hangul syllables lack decomposition mapping in # unicodedata, so calculate with a full decomposition # followed by a partial composition (Unicode Standard, # chapter 3.12) decomp = unicodedata.normalize('NFD', c) if len(decomp) == 3: decomp = unicodedata.normalize('NFC', decomp[:2]) + decomp[-1] decomp = tuple(ord(x) for x in decomp) assert decomp == ud[cp]['Decomposition_Mapping'] else: assert ud[cp]['Decomposition_Mapping'] == (cp, ) else: x = unicodedata.decomposition(c) if '<' in x: x = x.split('>', 1)[1].strip() x = tuple(int(y, 16) for y in x.split('\x20')) assert x == ud[cp]['Decomposition_Mapping'] dbc = ucdf.derivedbidiclass for cp in range(0, 0x10FFFF + 1): c = chr(cp) # Only compare assigned code points, because unicodedata and # unicodedata2 lack correct defaults for unassigned if cp in dbc and cp in ud: assert unicodedata.bidirectional(c) == dbc[cp]['Bidi_Class'] eaw = ucdf.eastasianwidth deaw = ucdf.derivedeastasianwidth for cp in range(0, 0x10FFFF + 1): c = chr(cp) # Only compare assigned code points, because unicodedata and # unicodedata2 lack correct defaults for unassigned if cp in eaw and cp in ud: assert unicodedata.east_asian_width( c) == eaw[cp]['East_Asian_Width'] if cp in deaw and cp in ud: assert unicodedata.east_asian_width( c) == deaw[cp]['East_Asian_Width']
import unicodedata print(unicodedata.lookup('LEFT CURLY BRACKET')) print(unicodedata.name('/')) print(unicodedata.decimal('9')) #unicodedata.decimal('a') print(unicodedata.category('A')) # 'L'etter, 'u'ppercase print(unicodedata.bidirectional('\u0660')) # 'A'rabic, 'N'umber from codecs import StreamWriter from datetime import timedelta d = timedelta(hours=1) print((d.days, d.seconds, d.microseconds))
# Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: character = chr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(character, None) is None: continue # pragma: no cover numeric_chars.append(character) # The digit characters are a subset of the numerals. digit_chars = [ a for a in numeric_chars if unicodedata.digit(a, None) is not None ] # The decimal characters are a subset of the numberals # (probably of the digits, but let's be safe). decimal_chars = [ a for a in numeric_chars if unicodedata.decimal(a, None) is not None ] # Create a single string with the above data. decimals = "".join(decimal_chars) digits = "".join(digit_chars) numeric = "".join(numeric_chars) digits_no_decimals = "".join([x for x in digits if x not in decimals]) numeric_no_decimals = "".join([x for x in numeric if x not in decimals])
def test_decimal_chars_contains_only_valid_unicode_decimal_characters(): for a in decimal_chars: assert unicodedata.decimal(a, None) is not None
""" Test script for the unicodedata module. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """#" from test_support import verify, verbose import sha encoding = 'utf-8' def test_methods(): h = sha.sha() for i in range(65536): char = unichr(i) data = [ # Predicates (single char) char.isalnum() and u'1' or u'0', char.isalpha() and u'1' or u'0', char.isdecimal() and u'1' or u'0', char.isdigit() and u'1' or u'0', char.islower() and u'1' or u'0', char.isnumeric() and u'1' or u'0', char.isspace() and u'1' or u'0', char.istitle() and u'1' or u'0', char.isupper() and u'1' or u'0', # Predicates (multiple chars) (char + u'abc').isalnum() and u'1' or u'0', (char + u'abc').isalpha() and u'1' or u'0', (char + u'123').isdecimal() and u'1' or u'0', (char + u'123').isdigit() and u'1' or u'0', (char + u'abc').islower() and u'1' or u'0', (char + u'123').isnumeric() and u'1' or u'0', (char + u' \t').isspace() and u'1' or u'0',
try: l = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(l, None) is None: continue # pragma: no cover numeric_chars.append(l) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # The decimal characters are a subset of the numberals # (probably of the digits, but let's be safe). decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None] # Create a single string with the above data. decimals = ''.join(decimal_chars) digits = ''.join(digit_chars) numeric = ''.join(numeric_chars) digits_no_decimals = ''.join([x for x in digits if x not in decimals]) numeric_no_decimals = ''.join([x for x in numeric if x not in decimals]) # Some code that can be used to create the above list of hex numbers. if __name__ == '__main__': import textwrap from natsort.compat.py23 import py23_range hex_chars = [] for i in py23_range(0X110000):
import unicodedata if __name__ == "__main__": s = "hello world, Lcoderfit" print(unicodedata.lookup('left curly bracket')) print(unicodedata.name('\\')) print(unicodedata.decimal("1")) print(unicodedata.digit("4")) print(unicodedata.numeric("9")) print(unicodedata.category("/")) print(unicodedata.bidirectional("b")) print(unicodedata.east_asian_width("b")) print(unicodedata.mirrored("{}"))
print("bidirectional ok") for category, cp in tests["categories"].items(): assert category == unicodedata.category(chr(int(cp, 16))) print("categories ok") for comb, cp in tests["combinings"].items(): assert int(comb) == unicodedata.combining(chr(int(cp, 16))) print("combining ok") for decimal, cp in tests["decimals"].items(): if decimal: assert eval(decimal) == unicodedata.decimal(chr(int(cp, 16))) print("decimals ok") for decomp, cp in tests["decompositions"].items(): assert decomp == unicodedata.decomposition(chr(int(cp, 16))) print("decomposition ok") for digit, cp in tests["digits"].items(): if digit: assert eval(digit) == unicodedata.digit(chr(int(cp, 16))) print("digits ok") for name, cp in tests["names"].items():
## Function 1-unicodedata.lookup(name) 2-unicodedata.name(chr[, default]) 3-unicodedata.decimal(chr[, default]) 4-unicodedata.digit(chr[, default]) 5-unicodedata.numeric(chr[, default]) 6-unicodedata.category(chr) 7-unicodedata.bidirectional(chr) 8-unicodedata.normalize(form, unistr) ''' import unicodedata print(unicodedata.lookup('LEFT CURLY BRACKET')) print(unicodedata.lookup('RIGHT CURLY BRACKET')) print(unicodedata.lookup('ASTERISK')) ############################# print(unicodedata.name(u'/')) print(unicodedata.name(u'|')) print(unicodedata.name(u':')) ################################ print(unicodedata.decimal(u'9')) print(unicodedata.decimal(u'5')) #################################### print(unicodedata.decimal(u'0')) print(unicodedata.decimal(u'1')) ################################### print(unicodedata.category(u'A')) print(unicodedata.category(u'b')) ################################### print(unicodedata.bidirectional(u'\u0660'))
# Test Unicode database APIs import unicodedata print 'Testing unicodedata module...', assert unicodedata.digit(u'A',None) is None assert unicodedata.digit(u'9') == 9 assert unicodedata.digit(u'\u215b',None) is None assert unicodedata.digit(u'\u2468') == 9 assert unicodedata.numeric(u'A',None) is None assert unicodedata.numeric(u'9') == 9 assert unicodedata.numeric(u'\u215b') == 0.125 assert unicodedata.numeric(u'\u2468') == 9.0 assert unicodedata.decimal(u'A',None) is None assert unicodedata.decimal(u'9') == 9 assert unicodedata.decimal(u'\u215b',None) is None assert unicodedata.decimal(u'\u2468',None) is None assert unicodedata.category(u'\uFFFE') == 'Cn' assert unicodedata.category(u'a') == 'Ll' assert unicodedata.category(u'A') == 'Lu' assert unicodedata.bidirectional(u'\uFFFE') == '' assert unicodedata.bidirectional(u' ') == 'WS' assert unicodedata.bidirectional(u'A') == 'L' assert unicodedata.decomposition(u'\uFFFE') == '' assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
import unicodedata for char in [u"A", u"-", u"1", u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}"]: print repr(char), print unicodedata.category(char), print repr(unicodedata.decomposition(char)), print unicodedata.decimal(char, None), print unicodedata.numeric(char, None) ## u'A' Lu '' None None ## u'-' Pd '' None None ## u'1' Nd '' 1 1.0 ## u'Ö' Lu '004F 0308' None None
def setUpModule(): log = logging.getLogger('unicodedata') log.info('generating unicodedata CSV') with tempfile.NamedTemporaryFile(prefix='unicode-', suffix='.csv') as csvfile: c = csv.writer(csvfile, quoting=csv.QUOTE_ALL) for i in xrange(sys.maxunicode + 1): if i >= 5024 and i <= 5119: continue # the Unicode Cherokee-Block is broken in Python 2.7 and Python 3.4 (maybe also 3.5) u = unichr(i) if unicodedata.category(u).startswith('C'): # [Cc]Other, Control # [Cf]Other, Format # [Cn]Other, Not Assigned # [Co]Other, Private Use # [Cs]Other, Surrogate continue row = ( i, # INT 0-1114111 unicodedata.name(u, 'UNICODE U+%08X' % i), # VARCHAR(100) ASCII u, # VARCHAR(1) UNICODE u.upper(), # VARCHAR(1) UNICODE u.lower(), # VARCHAR(1) UNICODE unicodedata.decimal(u, None), # INT unicodedata.numeric(u, None), # DOUBLE unicodedata.category(u), # VARCHAR(3) ASCII unicodedata.bidirectional(u), # VARCHAR(3) ASCII unicodedata.combining(u), # VARCHAR(3) ASCII unicodedata.east_asian_width(u), # VARCHAR(1) ASCII bool(unicodedata.mirrored), # BOOLEAN unicodedata.decomposition(u), # VARCHAR(10) ASCII unicodedata.normalize('NFC', u), # VARCHAR(3) UNICODE unicodedata.normalize('NFD', u), # VARCHAR(3) UNICODE unicodedata.normalize('NFKC', u), # VARCHAR(3) UNICODE unicodedata.normalize('NFKD', u), # VARCHAR(3) UNICODE ) c.writerow(utf8encoder(row)) csvfile.flush() log.info('loading CSV') sql = ''' DROP SCHEMA utest CASCADE; CREATE SCHEMA utest; CREATE TABLE unicodedata ( codepoint INT NOT NULL, name VARCHAR(100) ASCII, uchar VARCHAR(1) UTF8, to_upper VARCHAR(1) UTF8, to_lower VARCHAR(1) UTF8, decimal_value INT, numeric_value INT, category VARCHAR(3) ASCII, bidirectional VARCHAR(3) ASCII, combining VARCHAR(10) ASCII, east_asian_width VARCHAR(2) ASCII, mirrored BOOLEAN, decomposition VARCHAR(100) ASCII, NFC VARCHAR(10) UTF8, NFD VARCHAR(10) UTF8, NFKC VARCHAR(20) UTF8, NFKD VARCHAR(20) UTF8 ); IMPORT INTO unicodedata FROM LOCAL CSV FILE '%s' ROW SEPARATOR = 'CRLF'; ''' % os.path.join(os.getcwd(), csvfile.name) cmd = '''%(exaplus)s -c %(conn)s -u sys -P exasol -no-config -autocommit ON -L -pipe''' % { 'exaplus': os.environ.get( 'EXAPLUS', '/usr/opt/EXASuite-4/EXASolution-4.2.9/bin/Console/exaplus'), 'conn': udf.opts.server } env = os.environ.copy() env['PATH'] = '/usr/opt/jdk1.8.0_latest/bin:' + env['PATH'] exaplus = subprocess.Popen(cmd.split(), env=env, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, _err = exaplus.communicate(sql) if exaplus.returncode != 0: log.critical('EXAplus error: %d', exaplus.returncode) log.error(out) else: log.debug(out)
def check_food_qty_len(request): total_food_price = 0 data = {} food_data = [] pizzas = request.POST.getlist('pizzas', None) if len(pizzas) == 0: return False, 'Please select at least one pizza and quantity first.', data else: pizza_ins = Pizza.objects.filter(id__in=pizzas) for pizza in pizza_ins: temp_data = {} pizza_qty = 'pizzas_qty_' + str(pizza.id) input_pizza_qty = request.POST.get(pizza_qty, None) if input_pizza_qty is None or input_pizza_qty == '': return False, 'Pizza Quantity not given properly!', data temp_data['name'] = pizza.name temp_data['total_price'] = str(pizza.price * decimal(input_pizza_qty)) temp_data['qty'] = input_pizza_qty food_data.append(temp_data) total_food_price += pizza.price * decimal(input_pizza_qty) gluten_cauliflower = request.POST.getlist('gluten_cauliflower', None) if len(gluten_cauliflower) != 0: gluten_cauliflower_ins = GlutenCauliflower.objects.filter( id__in=gluten_cauliflower) for gluten_cauliflower in gluten_cauliflower_ins: temp_data = {} gluten_cauliflower_qty = 'gluten_cauliflower_qty_' + str( gluten_cauliflower.id) input_gluten_cauliflower_qty = request.POST.get( gluten_cauliflower_qty, None) if input_gluten_cauliflower_qty is None or input_gluten_cauliflower_qty == '': return False, 'Gluten Free and Cauliflower Crust Quantity not given properly!', data temp_data['name'] = gluten_cauliflower.name temp_data['total_price'] = str( gluten_cauliflower.price * decimal(input_gluten_cauliflower_qty)) temp_data['qty'] = input_gluten_cauliflower_qty food_data.append(temp_data) total_food_price += gluten_cauliflower.price * decimal( input_gluten_cauliflower_qty) # gluten_cauliflower_total_ins = GlutenCauliflower.objects.filter(id__in=pizzas).aggregate(Sum('price'))[ # 'price__sum'] # total_price += gluten_cauliflower_total_ins wings_sauce = request.POST.getlist('wings_sauce', None) if len(wings_sauce) != 0: wings_sauce_ins = WingSauce.objects.filter(id__in=wings_sauce) for wings_sauce_data in wings_sauce_ins: temp_data = {} wings_sauce_qty = 'wings_sauce_qty_' + str(wings_sauce_data.id) input_wings_sauce_qty = request.POST.get(wings_sauce_qty, None) special_request = request.POST.get('wings_sauce_special_request', None) if input_wings_sauce_qty is None or input_wings_sauce_qty == '': return False, 'Wings Sauces Quantity not given properly!', data temp_data['name'] = wings_sauce_data.name temp_data['total_price'] = str(wings_sauce_data.price * decimal(input_wings_sauce_qty)) temp_data['qty'] = input_wings_sauce_qty temp_data['special_request'] = special_request food_data.append(temp_data) total_food_price += wings_sauce_data.price * decimal( input_wings_sauce_qty) salad = request.POST.getlist('salad', None) if len(salad) != 0: salad_ins = Salad.objects.filter(id__in=salad) for salad_data in salad_ins: temp_data = {} salad_qty = 'salad_qty_' + str(salad_data.id) input_salad_qty = request.POST.get(salad_qty, None) special_request = request.POST.get('salad_special_request', None) if input_salad_qty is None or input_salad_qty == '': return False, 'Salads Quantity not given properly!', data temp_data['name'] = salad_data.name temp_data['total_price'] = str(salad_data.price * decimal(input_salad_qty)) temp_data['qty'] = input_salad_qty temp_data['special_request'] = special_request food_data.append(temp_data) total_food_price += salad_data.price * decimal(input_salad_qty) salad_dressing = request.POST.getlist('salad_dressing', None) salad_dressing_qty = request.POST.getlist('salad_dressing_qty', None) if len(salad_dressing) != 0: salad_dressing_ins = SaladDressing.objects.filter( id__in=salad_dressing) for salad_dressing_data in salad_dressing_ins: temp_data = {} salad_dressing_qty = 'salad_dressing_qty_' + str( salad_dressing_data.id) input_salad_dressing_qty = request.POST.get( salad_dressing_qty, None) if input_salad_dressing_qty is None or input_salad_dressing_qty == '': return False, 'Salad Dressings Quantity not given properly!', data temp_data['name'] = salad_dressing_data.name temp_data['total_price'] = str(salad_dressing_data.price * decimal(input_salad_dressing_qty)) temp_data['qty'] = input_salad_dressing_qty food_data.append(temp_data) total_food_price += salad_dressing_data.price * decimal( input_salad_dressing_qty) dessert = request.POST.getlist('dessert', None) dessert_qty = request.POST.getlist('dessert_qty', None) if len(dessert) != 0: dessert_ins = Dessert.objects.filter(id__in=dessert) for dessert_data in dessert_ins: temp_data = {} dessert_qty = 'dessert_qty_' + str(dessert_data.id) input_dessert_qty = request.POST.get(dessert_qty, None) special_request = request.POST.get('dessert_special_request', None) if input_dessert_qty is None or input_dessert_qty == '': return False, 'Desserts Quantity not given properly!', data temp_data['name'] = dessert_data.name temp_data['total_price'] = str(dessert_data.price * decimal(input_dessert_qty)) temp_data['qty'] = input_dessert_qty temp_data['special_request'] = special_request food_data.append(temp_data) total_food_price += dessert_data.price * decimal(input_dessert_qty) bread = request.POST.getlist('bread', None) if len(bread) != 0: bread_ins = Bread.objects.filter(id__in=bread) for bread_data in bread_ins: temp_data = {} bread_qty = 'bread_qty_' + str(bread_data.id) input_bread_qty = request.POST.get(bread_qty, None) if input_bread_qty is None or input_bread_qty == '': return False, 'Breads Quantity not given properly!', data temp_data['name'] = bread_data.name temp_data['total_price'] = str(bread_data.price * decimal(input_bread_qty)) temp_data['qty'] = input_bread_qty food_data.append(temp_data) total_food_price += bread_data.price * decimal(input_bread_qty) wing = request.POST.getlist('wing', None) wing_qty = request.POST.getlist('wing_qty', None) if len(wing) != 0: wing_ins = Wing.objects.filter(id__in=wing) for wing_data in wing_ins: temp_data = {} wing_qty = 'wing_qty_' + str(wing_data.id) input_wing_qty = request.POST.get(wing_qty, None) if input_wing_qty is None or input_wing_qty == '': return False, 'Wings Quantity not given properly!', data temp_data['name'] = wing_data.name temp_data['total_price'] = str(wing_data.price * decimal(input_wing_qty)) temp_data['qty'] = input_wing_qty food_data.append(temp_data) total_food_price += wing_data.price * decimal(input_wing_qty) # print('total price: ', total_price) # print('food data: ', food_data) data = {'food_data': food_data, 'total_food_price': total_food_price} return True, '', data
''' unicodedata 模块 unicodedata 模块包含了 Unicode 字符的属性, 例如字符类别, 分解数据, 以及数值. ''' import unicodedata for char in [u'A', u'-', u'1', u'w']: print(char,'-> ' ,end = '') print(repr(char), '-> ' ,end = '') print(unicodedata.category(char), '-> ' ,end = '') print(repr(unicodedata.decomposition(char)), '-> ' ,end = '') print(unicodedata.decimal(char, None),'=> ', end = '') print(unicodedata.numeric(char,None), end = '') print()