コード例 #1
0
ファイル: core.py プロジェクト: copyninja/chardetails
    def getdetails(self, text):
        chardetails = {}
        for character in text:
            chardetails[character] = {}
            chardetails[character]['Name'] = unicodedata.name(character)
            chardetails[character]['HTML Entity'] = str(ord(character))
            chardetails[character]['Code point'] = repr(character)
            try:
                chardetails[character]['Numeric Value'] = \
                        unicodedata.numeric(character)
            except:
                pass
            try:
                chardetails[character]['Decimal Value'] = \
                        unicodedata.decimal(character)
            except:
                pass
            try:
                chardetails[character]['Digit'] = unicodedata.digit(mychar)
            except:
                pass
            chardetails[character]['Alphabet'] = str(character.isalpha())
            chardetails[character]['Digit'] = str(character.isdigit())
            chardetails[character]['AlphaNumeric'] = str(character.isalnum())
            chardetails[character]['Canonical Decomposition'] = \
                    unicodedata.decomposition(character)

        chardetails['Characters'] = list(text)
        return chardetails
コード例 #2
0
def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters(
):
    set_numeric_hex = set(numeric_hex)
    set_numeric_chars = set(numeric_chars)
    set_digit_chars = set(digit_chars)
    set_decimal_chars = set(decimal_chars)
    for i in py23_range(0X110000):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in set('0123456789'):
            continue
        if unicodedata.numeric(a, None) is not None:
            assert i in set_numeric_hex
            assert a in set_numeric_chars
        if unicodedata.digit(a, None) is not None:
            assert i in set_numeric_hex
            assert a in set_digit_chars
        if unicodedata.decimal(a, None) is not None:
            assert i in set_numeric_hex
            assert a in set_decimal_chars

    assert set_decimal_chars.isdisjoint(digits_no_decimals)
    assert set_digit_chars.issuperset(digits_no_decimals)

    assert set_decimal_chars.isdisjoint(numeric_no_decimals)
    assert set_numeric_chars.issuperset(numeric_no_decimals)
コード例 #3
0
def get_type_numeric(pred, db):
    cur = db.conn.cursor()
    types = {}
    obj_list = []
    # cur.execute("""select max(a.obj1), min(a.obj1), avg(a.obj1), percentile_disc(0.1) within group (order by a.obj1) as p10,
    # 	percentile_disc(0.9) within group (order by a.obj1) as p90
    # 	from (select cast(obj as bigint) as obj1
    # 	from spot_triples where pred=(%s) and obj_type='int') a""", [pred])
    query = "select obj from " + db.spot_tb + " where pred=(%s) and obj_type='int' "
    cur.execute(query, [pred])
    for row in cur:
        val = 0
        try:
            val = int(row[0])
        except ValueError:
            # print (pred, row)
            try:
                unicode_char_list = ''.join([
                    str(unicodedata.decimal(d, -1))
                    for d in row[0].decode('utf8')
                ])
                val = int(unicode_char_list)
            except Exception as e:
                print(pred, row[0], e)
        else:
            obj_list.append(abs(val))
    cur.close()
    types['max'] = max(obj_list)
    types['min'] = min(obj_list)
    types['avg'] = sum(obj_list) / float(len(obj_list))
    np.asarray(obj_list)
    types['p10'] = np.percentile(obj_list, 10)
    types['p90'] = np.percentile(obj_list, 90)
    return types
コード例 #4
0
def conv(unicode_arabic_date):
    new_date = ''
    for d in unicode_arabic_date:
        if d != ':' and d != '/':
            new_date += str(unicodedata.decimal(d))
        elif d == ':':
            new_date += ':'
        elif d == '/':
            new_date += '/'
    return new_date
コード例 #5
0
    def _explain_char(self, ch, further):
        try:
            name = unicodedata.name(ch)
        except ValueError:
            name = f'[U+{hex(ord(ch))[2:]}]'
        if not further:
            return name + f'({ch})'
        infos = {
            'category': unicodedata.category(ch),
            'direction': unicodedata.bidirectional(ch),
            'east asian width': unicodedata.east_asian_width(ch)
        }

        decomposition = unicodedata.decomposition(ch)
        if decomposition:
            infos['decomposition'] = decomposition

        try:
            infos['digit value'] = unicodedata.digit(ch)
        except ValueError:
            pass
        try:
            infos['decimal value'] = unicodedata.decimal(ch)
        except ValueError:
            pass
        try:
            infos['numeric value'] = unicodedata.numeric(ch)
        except ValueError:
            pass
        comb = unicodedata.combining(ch)
        if comb != 0:
            infos['combining class'] = str(comb)

        mirrored = unicodedata.mirrored(ch)
        if mirrored:
            infos['mirrored'] = 'yes'
        if hasattr(unicodedata, 'is_normalized'):
            forms = []
            for form in ('NFC', 'NFD', 'NFKC', 'NFKD'):
                if unicodedata.is_normalized(form, ch):
                    forms.append(form)
            if forms:
                infos['normalized'] = f'yes: {", ".join(forms)}'
            else:
                infos['normalized'] = 'no'
        else:
            infos['normalized'] = 'unavailable'

        info = ', '.join([f'{k}: {v}' for k, v in infos.items()])
        return f'{name}: {ch!r} ({info})'
コード例 #6
0
def overview(tree_item):
    """ Returns an overview of the character
    """
    char = tree_item.obj
    return TEMPLATE.format(unicodedata.name(char, '<NO NAME AVAILABLE>'), char,
                           unicodedata.decimal(char, ''),
                           unicodedata.digit(char, ''),
                           unicodedata.numeric(char, ''),
                           unicodedata.category(char),
                           unicodedata.bidirectional(char),
                           unicodedata.combining(char),
                           unicodedata.east_asian_width(char),
                           unicodedata.mirrored(char),
                           unicodedata.decomposition(char))
コード例 #7
0
ファイル: unicode_browser.py プロジェクト: lebedov/objbrowser
def overview(tree_item):
    """ Returns an overview of the character
    """
    char = tree_item.obj
    return TEMPLATE.format(unicodedata.name(char, '<NO NAME AVAILABLE>'), 
                           char, 
                           unicodedata.decimal(char, ''),
                           unicodedata.digit(char, ''),
                           unicodedata.numeric(char, ''),
                           unicodedata.category(char),
                           unicodedata.bidirectional(char),
                           unicodedata.combining(char),
                           unicodedata.east_asian_width(char),
                           unicodedata.mirrored(char),
                           unicodedata.decomposition(char))                          
コード例 #8
0
ファイル: char2name.py プロジェクト: edt-yxz-zzd/python3_src
def char2info(ch):
    name = U.name(ch, None)
    decimal = U.decimal(ch, None)
    digit = U.digit(ch, None)
    numeric = U.numeric(ch, None)

    category = U.category(ch)
    bidirectional = U.bidirectional(ch)
    combining = U.combining(ch)
    east_asian_width = U.east_asian_width(ch)
    mirrored = U.mirrored(ch)
    decomposition = U.decomposition(ch)

    unicode = ord(ch)
    unicode_hex = hex(unicode)
    return dict(locals())
コード例 #9
0
def main():
    kb_name = 'dbp_map'
    filein = './count_information/integer_per_pred_per_sub_'
    fileout = './count_information/avg_integer_per_pred_per_sub_'

    with open(filein + kb_name + '.csv') as fin:
        reader = csv.reader(fin)
        prev_pred = None
        prev_sub = None
        count_val = []
        bufferout = []
        for row in tqdm(reader):
            sub = row[0]
            pred = row[1]
            try:
                val = int(row[2])
            except ValueError:
                try:
                    unicode_char_list = ''.join([
                        str(unicodedata.decimal(d, -1))
                        for d in row[2].decode('utf8')
                    ])
                    val = int(unicode_char_list)
                except Exception as e:
                    print(sub, pred, row[2], e)
                    continue

            if sub == prev_sub and pred == prev_pred:
                count_val.append(abs(val))
                continue
            elif prev_sub is not None and prev_pred is not None:
                bufferout.append([
                    prev_sub, prev_pred,
                    int(sum(count_val) / len(count_val))
                ])
                prev_sub = sub
                prev_pred = pred
                count_val = [abs(val)]
            else:
                prev_sub = sub
                prev_pred = pred
                count_val.append(abs(val))
            if len(bufferout) == 1000:
                with open(fileout + kb_name + '.csv', 'a') as fout:
                    writer = csv.writer(fout, quoting=csv.QUOTE_MINIMAL)
                    writer.writerows(bufferout)
                bufferout = []
コード例 #10
0
    def test_compare_functions(self):
        def getX(fun, code):
            try:
                return getattr(unicodedb_5_2_0, fun)(code)
            except KeyError:
                return -1

        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_5_2_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
コード例 #11
0
ファイル: test_unicodedata.py プロジェクト: mozillazg/pypy
    def test_compare_functions(self):
        def getX(fun, code):
            try:
                return getattr(unicodedb_5_2_0, fun)(code)
            except KeyError:
                return -1

        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_5_2_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
コード例 #12
0
def main():
    try:
        v = bytes(int(x, 16) for x in sys.argv[1:])
        c = v.decode('utf8')
        print('gryph:            %s' % c)
        print('codepoint:        U+%x' % ord(c))
        print('name:             %s' % unicodedata.name(c, 'Unknown'))
        print('decimal:          %s' % unicodedata.decimal(c, 'Unknown'))
        print('digit:            %s' % unicodedata.digit(c, 'Unknown'))
        print('numeric:          %s' % unicodedata.numeric(c, 'Unknown'))
        print('category:         %s' % unicodedata.category(c))
        print('bidirectional:    %s' % unicodedata.bidirectional(c))
        print('combining:        %s' % unicodedata.combining(c))
        print('east_asian_width: %s' % unicodedata.east_asian_width(c))
        print('mirrored:         %s' % unicodedata.mirrored(c))
        print('decomposition:    %s' % unicodedata.decomposition(c))
    except Exception as ex:
        print('ERROR: %s' % ex)
コード例 #13
0
ファイル: show_utf8_char.py プロジェクト: odashi/nlptools
def main():
  try:
    v = bytes(int(x, 16) for x in sys.argv[1:])
    c = v.decode('utf8')
    print('gryph:            %s' % c)
    print('codepoint:        U+%x' % ord(c))
    print('name:             %s' % unicodedata.name(c, 'Unknown'))
    print('decimal:          %s' % unicodedata.decimal(c, 'Unknown'))
    print('digit:            %s' % unicodedata.digit(c, 'Unknown'))
    print('numeric:          %s' % unicodedata.numeric(c, 'Unknown'))
    print('category:         %s' % unicodedata.category(c))
    print('bidirectional:    %s' % unicodedata.bidirectional(c))
    print('combining:        %s' % unicodedata.combining(c))
    print('east_asian_width: %s' % unicodedata.east_asian_width(c))
    print('mirrored:         %s' % unicodedata.mirrored(c))
    print('decomposition:    %s' % unicodedata.decomposition(c))
  except Exception as ex:
    print('ERROR: %s' % ex)
コード例 #14
0
    def __init__(self, symbol):

        self.symbol = symbol
        self.name = u.name(symbol, 'NO_NAME_FOUND')
        self.decimal = u.decimal(self.symbol, -1)
        self.digit = u.digit(self.symbol, -1)
        self.numeric = u.numeric(self.symbol, -1)
        self.category = u.category(self.symbol)
        self.bidirectional = u.bidirectional(self.symbol)
        self.combining = u.combining(self.symbol)
        self.east_asian_width = u.east_asian_width(self.symbol)
        self.mirrored = u.mirrored(self.symbol)
        self.decomposition = u.decomposition(self.symbol)
        self.normalize_nfc = u.normalize('NFC', self.symbol)
        self.normalize_nkfc = u.normalize('NFKC', self.symbol)
        self.normalize_nfd = u.normalize('NFD', self.symbol)
        self.normalize_nkfd = u.normalize('NFKD', self.symbol)

        if Config.debug['unicode']:
            self.print_debug()
コード例 #15
0
def analyze(text):
    results = []
    mecab = MeCab.Tagger('-Ounidic -d %s -r %s' %
                         (mecab_dicdir, os.path.join(dicrc_dir, 'dicrc')))

    # 小節単位に分割
    text = text.strip()
    text = re.sub(phrase_split_chars_uni, ' ', text)
    text = text.encode('utf-8').replace('\r\n', '\n').replace('\n', ' ')
    text = text.split('===')
    text = map(lambda p: p.strip(), text)
    lyrics = map(lambda p: p.split(' '), text)

    # 読みとアクセントの解析
    for i, phrases in enumerate(lyrics):
        temp = []
        for phrase in phrases:
            for word in mecab.parse(phrase).decode('utf-8').split('\n'):
                features = word.split('\t')
                if len(features) == 4:
                    atypes = []
                    acons = []
                    try:
                        # アクセント型
                        atypes = map(lambda n: unicodedata.decimal(n),
                                     features[2].split(','))
                        acons = features[3].split(',')
                    except TypeError:
                        # アクセントが不明
                        pass

                    prono = features[0]  # 読み
                    if not prono or not re.match(ok_chars, prono):
                        continue

                    if len(atypes) > 0:
                        prono = insert_accent(prono, atypes[0])
                    temp.append(prono)
            temp.append(' ')
        results.append({'lyric': text[i], 'phoneme': '/'.join(temp).rstrip()})
    return results
コード例 #16
0
ファイル: test_unicodedata.py プロジェクト: xx312022850/pypy
    def test_compare_functions(self):
        import unicodedata # CPython implementation

        def getX(fun, code):
            if fun == 'numeric' and code in self.diff_numeric:
                return -1
            try:
                return getattr(unicodedb_4_1_0, fun)(code)
            except KeyError:
                return -1
        
        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_4_1_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_4_1_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_4_1_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_4_1_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_4_1_0.combining(code)
コード例 #17
0
ファイル: views.py プロジェクト: Tanmfran/foodstuffs
def update_quantity(request, food_id):
    food = get_object_or_404(FoodItems, id=food_id)
    try:
        selected_choice = request.POST['quantity']
    except (KeyError, food.DoesNotExist):
        # Redisplay the question voting form.
        return render(request, 'foods/detail.html', {
            'fooditems': food,
            'error_message': "You didn't select a quantity.",
        })
    else:
        food.quantity = selected_choice
        food.save()
        # Always return an HttpResponseRedirect after successfully dealing
        # with POST data. This prevents data from being posted twice if a
        # user hits the Back button.
        total_cost = '$' + str(decimal(food.quantity) * food.item_cost)
        return {
            'fooditems': food,
            'total_cost': total_cost,
        }
コード例 #18
0
    def test_compare_functions(self):
        import unicodedata # CPython implementation

        def getX(fun, code):
            if fun == 'numeric' and code in self.diff_numeric:
                return -1
            try:
                return getattr(unicodedb_4_1_0, fun)(code)
            except KeyError:
                return -1
        
        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_4_1_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_4_1_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_4_1_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_4_1_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_4_1_0.combining(code)
コード例 #19
0
ファイル: lyrics.py プロジェクト: kvvzr/Melete
def analyze(text):
    results = []
    mecab = MeCab.Tagger('-Ounidic -d %s -r dicrc' % mecab_dicdir)

    # 小節単位に分割
    text = text.strip()
    text = re.sub(phrase_split_chars_uni, ' ', text)
    text = text.encode('utf-8').replace('\r\n', '\n').replace('\n', ' ')
    text = text.split('===')
    text = map(lambda p: p.strip(), text)
    lyrics = map(lambda p: p.split(' '), text)

    # 読みとアクセントの解析
    for i, phrases in enumerate(lyrics):
        temp = []
        for phrase in phrases:
            for word in mecab.parse(phrase).decode('utf-8').split('\n'):
                features = word.split('\t')
                if len(features) == 4:
                    atypes = []
                    acons = []
                    try:
                        # アクセント型
                        atypes = map(lambda n: unicodedata.decimal(n), features[2].split(','))
                        acons = features[3].split(',')
                    except TypeError:
                        # アクセントが不明
                        pass

                    prono = features[0] # 読み
                    if not prono or not re.match(ok_chars, prono):
                        continue

                    if len(atypes) > 0:
                        prono = insert_accent(prono, atypes[0])
                    temp.append(prono)
            temp.append(' ')
        results.append({'lyric': text[i], 'phoneme': '/'.join(temp).rstrip()})
    return results
コード例 #20
0
    def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
コード例 #21
0
    def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
コード例 #22
0
print test_unicodedata()

# Some additional checks of the API:
print 'API:',

verify(unicodedata.digit(u'A',None) is None)
verify(unicodedata.digit(u'9') == 9)
verify(unicodedata.digit(u'\u215b',None) is None)
verify(unicodedata.digit(u'\u2468') == 9)

verify(unicodedata.numeric(u'A',None) is None)
verify(unicodedata.numeric(u'9') == 9)
verify(unicodedata.numeric(u'\u215b') == 0.125)
verify(unicodedata.numeric(u'\u2468') == 9.0)

verify(unicodedata.decimal(u'A',None) is None)
verify(unicodedata.decimal(u'9') == 9)
verify(unicodedata.decimal(u'\u215b',None) is None)
verify(unicodedata.decimal(u'\u2468',None) is None)

verify(unicodedata.category(u'\uFFFE') == 'Cn')
verify(unicodedata.category(u'a') == 'Ll')
verify(unicodedata.category(u'A') == 'Lu')

verify(unicodedata.bidirectional(u'\uFFFE') == '')
verify(unicodedata.bidirectional(u' ') == 'WS')
verify(unicodedata.bidirectional(u'A') == 'L')

verify(unicodedata.decomposition(u'\uFFFE') == '')
verify(unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034')
コード例 #23
0
ファイル: codepoint.py プロジェクト: Codepoints/unicodeinfo
 def decimal(self, default=None):
     return ud.decimal(self.char, default)
コード例 #24
0
    unicode_digit = defaultdict(list)
    unicode_decimal = defaultdict(list)

    for c in map(chr, range(sys.maxunicode + 1)):
        unicode_category[unicodedata.category(c)].append(c)

        if unicodedata.bidirectional(c):
            unicode_bidirectional[unicodedata.bidirectional(c)].append(c)

        if unicodedata.numeric(c, None) is not None:
            unicode_numeric[unicodedata.numeric(c)].append(c)

        if unicodedata.digit(c, None) is not None:
            unicode_digit[unicodedata.digit(c)].append(c)

        if unicodedata.decimal(c, None) is not None:
            unicode_decimal[unicodedata.decimal(c)].append(c)

    # get all punctuation
    punctuation = set()
    for class_name in unicode_category.keys():
        if class_name.startswith('P') or class_name.startswith('S'):
            print(class_name)
            for char in unicode_category[class_name]:
                punctuation.add(char)

    with open('punctuation_lookup.py', 'w', encoding='ascii') as f:
        f.write('PUNCTUATION = {\n')
        for p in sorted(punctuation):

            if p == '"':
コード例 #25
0
def ascii_char_smash(char):
    """Smash a single Unicode character into an ASCII representation.

    >>> ascii_char_smash(u"\N{KATAKANA LETTER SMALL A}")
    'a'
    >>> ascii_char_smash(u"\N{KATAKANA LETTER A}")
    'A'
    >>> ascii_char_smash(u"\N{KATAKANA LETTER KA}")
    'KA'
    >>> ascii_char_smash(u"\N{HIRAGANA LETTER SMALL A}")
    'a'
    >>> ascii_char_smash(u"\N{HIRAGANA LETTER A}")
    'A'
    >>> ascii_char_smash(u"\N{BOPOMOFO LETTER ANG}")
    'ANG'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER H WITH STROKE}")
    'H'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER LONG S}")
    's'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER THORN}")
    'TH'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER THORN}")
    'th'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER I WITH OGONEK}")
    'I'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER AE}")
    'AE'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}")
    'Ae'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER A WITH DIAERESIS}")
    'ae'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}")
    'Oe'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER O WITH DIAERESIS}")
    'oe'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}")
    'Ue'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER U WITH DIAERESIS}")
    'ue'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER SHARP S}")
    'ss'

    Latin-1 and other symbols are lost

    >>> ascii_char_smash(u"\N{POUND SIGN}")
    ''

    Unless they also happen to be letters of some kind, such as greek

    >>> ascii_char_smash(u"\N{MICRO SIGN}")
    'mu'

    Fractions

    >>> ascii_char_smash(u"\N{VULGAR FRACTION ONE HALF}")
    '1/2'

    """
    mapping = {
        u"\N{LATIN CAPITAL LETTER AE}": "AE",
        u"\N{LATIN SMALL LETTER AE}": "ae",
        u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}": "Ae",
        u"\N{LATIN SMALL LETTER A WITH DIAERESIS}": "ae",
        u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}": "Oe",
        u"\N{LATIN SMALL LETTER O WITH DIAERESIS}": "oe",
        u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}": "Ue",
        u"\N{LATIN SMALL LETTER U WITH DIAERESIS}": "ue",
        u"\N{LATIN SMALL LETTER SHARP S}": "ss",
        u"\N{LATIN CAPITAL LETTER THORN}": "TH",
        u"\N{LATIN SMALL LETTER THORN}": "th",
        u"\N{FRACTION SLASH}": "/",
        u"\N{MULTIPLICATION SIGN}": "x",
        u"\N{KATAKANA-HIRAGANA DOUBLE HYPHEN}": "=",
    }

    # Pass through ASCII
    if ord(char) < 127:
        return char

    # Handle manual mappings
    if mapping.has_key(char):
        return mapping[char]

    # Regress to decomposed form and recurse if necessary.
    decomposed = unicodedata.normalize("NFKD", char)
    if decomposed != char:
        out = StringIO()
        for char in decomposed:
            out.write(ascii_char_smash(char))
        return out.getvalue()

    # Handle whitespace
    if char.isspace():
        return " "

    # Handle digits
    if char.isdigit():
        return unicodedata.digit(char)

    # Handle decimal (probably pointless given isdigit above)
    if char.isdecimal():
        return unicodedata.decimal(char)

    # Handle numerics, such as 1/2
    if char.isnumeric():
        formatted = "%f" % unicodedata.numeric(char)
        # Strip leading and trailing 0
        return formatted.strip("0")

    # Ignore unprintables, such as the accents we denormalized
    if not char.isalnum():
        return ""

    # Return modified latin characters as just the latin part.
    name = unicodedata.name(char)

    match = re.search("LATIN CAPITAL LIGATURE (\w+)", name)
    if match is not None:
        return match.group(1)

    match = re.search("LATIN SMALL LIGATURE (\w+)", name)
    if match is not None:
        return match.group(1).lower()

    match = re.search("(?:LETTER SMALL|SMALL LETTER) (\w+)", name)
    if match is not None:
        return match.group(1).lower()

    match = re.search("LETTER (\w+)", name)
    if match is not None:
        return match.group(1)

    # Something we can't represent. Return empty string.
    return ""
コード例 #26
0
import unicodedata

import tangled_up_in_unicode as unicode_data

if __name__ == "__main__":
    basic = [
        {
            "property": "Name",
            "standard": unicodedata.name,
            "new": unicode_data.name
        },
        {
            "property": "Decimal",
            "standard": lambda x: unicodedata.decimal(x, -1),
            "new": lambda x: unicode_data.decimal(x, -1),
        },
        {
            "property": "Digit",
            "standard": lambda x: unicodedata.digit(x, -1),
            "new": lambda x: unicode_data.digit(x, -1),
        },
        {
            "property": "Numeric",
            "standard": lambda x: unicodedata.numeric(x, -1.0),
            "new": lambda x: unicode_data.numeric(x, -1.0),
        },
        {
            "property": "Category",
            "standard": unicodedata.category,
            "new": unicode_data.category,
        },
コード例 #27
0
'''
unicodedata 模块

unicodedata 模块包含了 Unicode 字符的属性, 例如字符类别, 分解数据, 以及数值.
'''
import unicodedata

for char in [u'A', u'-', u'1', u'w']:
    print(char, '-> ', end='')
    print(repr(char), '-> ', end='')
    print(unicodedata.category(char), '-> ', end='')
    print(repr(unicodedata.decomposition(char)), '-> ', end='')
    print(unicodedata.decimal(char, None), '=> ', end='')
    print(unicodedata.numeric(char, None), end='')
    print()
コード例 #28
0
def ascii_char_smash(char):
    """Smash a single Unicode character into an ASCII representation.

    >>> ascii_char_smash(u"\N{KATAKANA LETTER SMALL A}")
    'a'
    >>> ascii_char_smash(u"\N{KATAKANA LETTER A}")
    'A'
    >>> ascii_char_smash(u"\N{KATAKANA LETTER KA}")
    'KA'
    >>> ascii_char_smash(u"\N{HIRAGANA LETTER SMALL A}")
    'a'
    >>> ascii_char_smash(u"\N{HIRAGANA LETTER A}")
    'A'
    >>> ascii_char_smash(u"\N{BOPOMOFO LETTER ANG}")
    'ANG'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER H WITH STROKE}")
    'H'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER LONG S}")
    's'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER THORN}")
    'TH'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER THORN}")
    'th'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER I WITH OGONEK}")
    'I'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER AE}")
    'AE'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}")
    'Ae'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER A WITH DIAERESIS}")
    'ae'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}")
    'Oe'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER O WITH DIAERESIS}")
    'oe'
    >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}")
    'Ue'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER U WITH DIAERESIS}")
    'ue'
    >>> ascii_char_smash(u"\N{LATIN SMALL LETTER SHARP S}")
    'ss'

    Latin-1 and other symbols are lost

    >>> ascii_char_smash(u"\N{POUND SIGN}")
    ''

    Unless they also happen to be letters of some kind, such as greek

    >>> ascii_char_smash(u"\N{MICRO SIGN}")
    'mu'

    Fractions

    >>> ascii_char_smash(u"\N{VULGAR FRACTION ONE HALF}")
    '1/2'

    """
    mapping = {
        u"\N{LATIN CAPITAL LETTER AE}": "AE",
        u"\N{LATIN SMALL LETTER AE}": "ae",

        u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}": "Ae",
        u"\N{LATIN SMALL LETTER A WITH DIAERESIS}": "ae",

        u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}": "Oe",
        u"\N{LATIN SMALL LETTER O WITH DIAERESIS}": "oe",

        u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}": "Ue",
        u"\N{LATIN SMALL LETTER U WITH DIAERESIS}": "ue",

        u"\N{LATIN SMALL LETTER SHARP S}": "ss",

        u"\N{LATIN CAPITAL LETTER THORN}": "TH",
        u"\N{LATIN SMALL LETTER THORN}": "th",

        u"\N{FRACTION SLASH}": "/",
        u"\N{MULTIPLICATION SIGN}": "x",

        u"\N{KATAKANA-HIRAGANA DOUBLE HYPHEN}": "=",
        }

    # Pass through ASCII
    if ord(char) < 127:
        return char

    # Handle manual mappings
    if mapping.has_key(char):
        return mapping[char]

    # Regress to decomposed form and recurse if necessary.
    decomposed = unicodedata.normalize("NFKD", char)
    if decomposed != char:
        out = StringIO()
        for char in decomposed:
            out.write(ascii_char_smash(char))
        return out.getvalue()

    # Handle whitespace
    if char.isspace():
        return " "

    # Handle digits
    if char.isdigit():
        return unicodedata.digit(char)

    # Handle decimal (probably pointless given isdigit above)
    if char.isdecimal():
        return unicodedata.decimal(char)

    # Handle numerics, such as 1/2
    if char.isnumeric():
        formatted = "%f" % unicodedata.numeric(char)
        # Strip leading and trailing 0
        return formatted.strip("0")

    # Ignore unprintables, such as the accents we denormalized
    if not char.isalnum():
        return ""

    # Return modified latin characters as just the latin part.
    name = unicodedata.name(char)

    match = re.search("LATIN CAPITAL LIGATURE (\w+)", name)
    if match is not None:
        return match.group(1)

    match = re.search("LATIN SMALL LIGATURE (\w+)", name)
    if match is not None:
        return match.group(1).lower()

    match = re.search("(?:LETTER SMALL|SMALL LETTER) (\w+)", name)
    if match is not None:
        return match.group(1).lower()

    match = re.search("LETTER (\w+)", name)
    if match is not None:
        return match.group(1)

    # Something we can't represent. Return empty string.
    return ""
コード例 #29
0
print test_unicodedata()

# Some additional checks of the API:
print 'API:',

verify(unicodedata.digit(u'A',None) is None)
verify(unicodedata.digit(u'9') == 9)
verify(unicodedata.digit(u'\u215b',None) is None)
verify(unicodedata.digit(u'\u2468') == 9)

verify(unicodedata.numeric(u'A',None) is None)
verify(unicodedata.numeric(u'9') == 9)
verify(unicodedata.numeric(u'\u215b') == 0.125)
verify(unicodedata.numeric(u'\u2468') == 9.0)

verify(unicodedata.decimal(u'A',None) is None)
verify(unicodedata.decimal(u'9') == 9)
verify(unicodedata.decimal(u'\u215b',None) is None)
verify(unicodedata.decimal(u'\u2468',None) is None)

verify(unicodedata.category(u'\uFFFE') == 'Cn')
verify(unicodedata.category(u'a') == 'Ll')
verify(unicodedata.category(u'A') == 'Lu')

verify(unicodedata.bidirectional(u'\uFFFE') == '')
verify(unicodedata.bidirectional(u' ') == 'WS')
verify(unicodedata.bidirectional(u'A') == 'L')

verify(unicodedata.decomposition(u'\uFFFE') == '')
verify(unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034')
コード例 #30
0
ファイル: charinfo.py プロジェクト: justworx/trix_rd3
 def decimal(self):
     """Return unicodedata.decimal."""
     try:
         return unicodedata.decimal(self.c)
     except ValueError:
         return None
コード例 #31
0
from natsort.compat.py23 import py23_unichr
from natsort.unicode_numeric_hex import numeric_hex

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        character = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(character, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(character)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None]

# Create a single string with the above data.
decimals = "".join(decimal_chars)
digits = "".join(digit_chars)
numeric = "".join(numeric_chars)
digits_no_decimals = "".join([x for x in digits if x not in decimals])
numeric_no_decimals = "".join([x for x in numeric if x not in decimals])
コード例 #32
0
ファイル: core.py プロジェクト: libindic/chardetails
    def getdetails(self, text):
        """
        Gives details of all charecters in the given string.

        :param text: The unicode string to be examined.
        :type text: str.
        :returns:  dictionary with details.

        ::

         >>> import chardetails.getInstance
         >>> a = getInstance()
         >>> a.getdetails(u"run")
         {'Characters': [u'r', u'u', u'n'],
         u'n': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'n'",
         'Digit': 'False',
         'HTML Entity': '110',
         'Name': 'LATIN SMALL LETTER N'},
         u'r': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'r'",
         'Digit': 'False',
         'HTML Entity': '114',
         'Name': 'LATIN SMALL LETTER R'},
         u'u': {'AlphaNumeric': 'True',
         'Alphabet': 'True',
         'Canonical Decomposition': '',
         'Code point': "u'u'",
         'Digit': 'False',
         'HTML Entity': '117',
         'Name': 'LATIN SMALL LETTER U'}}


        """
        chardetails = {}
        for character in text:
            chardetails[character] = {}
            chardetails[character]['Name'] = unicodedata.name(character)
            chardetails[character]['HTML Entity'] = str(ord(character))
            chardetails[character]['Code point'] = repr(character)
            try:
                chardetails[character]['Numeric Value'] = \
                    unicodedata.numeric(character)
            except:
                pass
            try:
                chardetails[character]['Decimal Value'] = \
                    unicodedata.decimal(character)
            except:
                pass
            try:
                chardetails[character]['Digit'] = unicodedata.digit(character)
            except:
                pass
            chardetails[character]['Alphabet'] = str(character.isalpha())
            chardetails[character]['Digit'] = str(character.isdigit())
            chardetails[character]['AlphaNumeric'] = str(character.isalnum())
            chardetails[character]['Canonical Decomposition'] = \
                unicodedata.decomposition(character)

        chardetails['Characters'] = list(text)
        return chardetails
コード例 #33
0
print test_unicodedata()

# Some additional checks of the API:
print "API:",

verify(unicodedata.digit(u"A", None) is None)
verify(unicodedata.digit(u"9") == 9)
verify(unicodedata.digit(u"\u215b", None) is None)
verify(unicodedata.digit(u"\u2468") == 9)

verify(unicodedata.numeric(u"A", None) is None)
verify(unicodedata.numeric(u"9") == 9)
verify(unicodedata.numeric(u"\u215b") == 0.125)
verify(unicodedata.numeric(u"\u2468") == 9.0)

verify(unicodedata.decimal(u"A", None) is None)
verify(unicodedata.decimal(u"9") == 9)
verify(unicodedata.decimal(u"\u215b", None) is None)
verify(unicodedata.decimal(u"\u2468", None) is None)

verify(unicodedata.category(u"\uFFFE") == "Cn")
verify(unicodedata.category(u"a") == "Ll")
verify(unicodedata.category(u"A") == "Lu")

verify(unicodedata.bidirectional(u"\uFFFE") == "")
verify(unicodedata.bidirectional(u" ") == "WS")
verify(unicodedata.bidirectional(u"A") == "L")

verify(unicodedata.decomposition(u"\uFFFE") == "")
verify(unicodedata.decomposition(u"\u00bc") == "<fraction> 0031 2044 0034")
コード例 #34
0
def test_against_unicodedata():
    '''
    Check against `unicodedata` or `unicodedata2` if available with the
    correct version of Unicode.
    '''
    if unicodedata is None:
        raise Exception(
            'Packages unicodedata and unicodedata2 are not available with the necessary version of Unicode ({0}); many consistency tests were omitted'
            .format(mdl.UNICODE_VERSION))
    ucdf = mdl.UCDFiles()

    ud = ucdf.unicodedata
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        if cp in ud:
            name = unicodedata.name(c, None)
            if name is None:
                # Handle missing names in unicodedata
                # Compare Table 4-13 in Unicode Standard
                # http://www.unicode.org/versions/Unicode9.0.0/ch04.pdf
                if 0x17000 <= cp <= 0x187EC:
                    assert ud[cp]['Name'] == 'TANGUT IDEOGRAPH-{0:04X}'.format(
                        cp)
                else:
                    assert ud[cp]['Name'] == ''
            else:
                assert name == ud[cp]['Name']
            decimal, digit, numeric = (unicodedata.decimal(c, None),
                                       unicodedata.digit(c, None),
                                       unicodedata.numeric(c, None))
            if any(x is not None for x in (decimal, digit, numeric)):
                if decimal is not None:
                    assert decimal == int(ud[cp]['Numeric_Value']) and ud[cp][
                        'Numeric_Type'] == 'Decimal' and digit is not None and decimal is not None
                elif digit is not None:
                    assert digit == int(ud[cp]['Numeric_Value']) and ud[cp][
                        'Numeric_Type'] == 'Digit' and decimal is None and numeric is not None
                elif numeric is not None:
                    try:
                        num = float(ud[cp]['Numeric_Value'])
                    except ValueError:
                        if '/' in ud[cp]['Numeric_Value']:
                            numerator, denominator = ud[cp][
                                'Numeric_Value'].split('/')
                            num = float(numerator) / float(denominator)
                        else:
                            raise
                    assert numeric == num and ud[cp][
                        'Numeric_Type'] == 'Numeric' and digit is None and decimal is None
                else:
                    raise Exception
            else:
                assert ud[cp]['Numeric_Value'] == 'NaN' and ud[cp][
                    'Numeric_Type'] == 'None'
            assert unicodedata.category(c) == ud[cp]['General_Category']
            assert unicodedata.bidirectional(c) == ud[cp]['Bidi_Class']
            assert unicodedata.combining(c) == int(
                ud[cp]['Canonical_Combining_Class'])
            assert unicodedata.mirrored(c) == ud[cp]['Bidi_Mirrored']
            if unicodedata.decomposition(c) == '':
                if ud[cp]['Name'].startswith('HANGUL SYLLABLE'):
                    # The Hangul syllables lack decomposition mapping in
                    # unicodedata, so calculate with a full decomposition
                    # followed by a partial composition (Unicode Standard,
                    # chapter 3.12)
                    decomp = unicodedata.normalize('NFD', c)
                    if len(decomp) == 3:
                        decomp = unicodedata.normalize('NFC',
                                                       decomp[:2]) + decomp[-1]
                    decomp = tuple(ord(x) for x in decomp)
                    assert decomp == ud[cp]['Decomposition_Mapping']
                else:
                    assert ud[cp]['Decomposition_Mapping'] == (cp, )
            else:
                x = unicodedata.decomposition(c)
                if '<' in x:
                    x = x.split('>', 1)[1].strip()
                x = tuple(int(y, 16) for y in x.split('\x20'))
                assert x == ud[cp]['Decomposition_Mapping']

    dbc = ucdf.derivedbidiclass
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        # Only compare assigned code points, because unicodedata and
        # unicodedata2 lack correct defaults for unassigned
        if cp in dbc and cp in ud:
            assert unicodedata.bidirectional(c) == dbc[cp]['Bidi_Class']

    eaw = ucdf.eastasianwidth
    deaw = ucdf.derivedeastasianwidth
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        # Only compare assigned code points, because unicodedata and
        # unicodedata2 lack correct defaults for unassigned
        if cp in eaw and cp in ud:
            assert unicodedata.east_asian_width(
                c) == eaw[cp]['East_Asian_Width']
        if cp in deaw and cp in ud:
            assert unicodedata.east_asian_width(
                c) == deaw[cp]['East_Asian_Width']
コード例 #35
0
import unicodedata

print(unicodedata.lookup('LEFT CURLY BRACKET'))

print(unicodedata.name('/'))

print(unicodedata.decimal('9'))

#unicodedata.decimal('a')

print(unicodedata.category('A'))  # 'L'etter, 'u'ppercase

print(unicodedata.bidirectional('\u0660'))  # 'A'rabic, 'N'umber

from codecs import StreamWriter

from datetime import timedelta

d = timedelta(hours=1)
print((d.days, d.seconds, d.microseconds))
コード例 #36
0
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        character = chr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(character, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(character)

# The digit characters are a subset of the numerals.
digit_chars = [
    a for a in numeric_chars if unicodedata.digit(a, None) is not None
]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [
    a for a in numeric_chars if unicodedata.decimal(a, None) is not None
]

# Create a single string with the above data.
decimals = "".join(decimal_chars)
digits = "".join(digit_chars)
numeric = "".join(numeric_chars)
digits_no_decimals = "".join([x for x in digits if x not in decimals])
numeric_no_decimals = "".join([x for x in numeric if x not in decimals])
コード例 #37
0
def test_decimal_chars_contains_only_valid_unicode_decimal_characters():
    for a in decimal_chars:
        assert unicodedata.decimal(a, None) is not None
コード例 #38
0
ファイル: test_unicodedata.py プロジェクト: mcyril/ravel-ftn
""" Test script for the unicodedata module.
    Written by Marc-Andre Lemburg ([email protected]).
    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test_support import verify, verbose
import sha
encoding = 'utf-8'
def test_methods():
    h = sha.sha()
    for i in range(65536):
        char = unichr(i)
        data = [
            # Predicates (single char)
            char.isalnum() and u'1' or u'0',
            char.isalpha() and u'1' or u'0',
            char.isdecimal() and u'1' or u'0',
            char.isdigit() and u'1' or u'0',
            char.islower() and u'1' or u'0',
            char.isnumeric() and u'1' or u'0',
            char.isspace() and u'1' or u'0',
            char.istitle() and u'1' or u'0',
            char.isupper() and u'1' or u'0',
            # Predicates (multiple chars)
            (char + u'abc').isalnum() and u'1' or u'0',
            (char + u'abc').isalpha() and u'1' or u'0',
            (char + u'123').isdecimal() and u'1' or u'0',
            (char + u'123').isdigit() and u'1' or u'0',
            (char + u'abc').islower() and u'1' or u'0',
            (char + u'123').isnumeric() and u'1' or u'0',
            (char + u' \t').isspace() and u'1' or u'0',
コード例 #39
0
ファイル: unicode_numbers.py プロジェクト: DarkSir23/mylar
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars
               if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars
                 if unicodedata.decimal(a, None) is not None]

# Create a single string with the above data.
decimals = ''.join(decimal_chars)
digits = ''.join(digit_chars)
numeric = ''.join(numeric_chars)
digits_no_decimals = ''.join([x for x in digits if x not in decimals])
numeric_no_decimals = ''.join([x for x in numeric if x not in decimals])

# Some code that can be used to create the above list of hex numbers.
if __name__ == '__main__':
    import textwrap
    from natsort.compat.py23 import py23_range

    hex_chars = []
    for i in py23_range(0X110000):
コード例 #40
0
import unicodedata


if __name__ == "__main__":
    s = "hello world, Lcoderfit"
    print(unicodedata.lookup('left curly bracket'))

    print(unicodedata.name('\\'))
    print(unicodedata.decimal("1"))
    print(unicodedata.digit("4"))
    print(unicodedata.numeric("9"))
    print(unicodedata.category("/"))

    print(unicodedata.bidirectional("b"))

    print(unicodedata.east_asian_width("b"))
    print(unicodedata.mirrored("{}"))
コード例 #41
0
print("bidirectional ok")

for category, cp in tests["categories"].items():
    assert category == unicodedata.category(chr(int(cp, 16)))

print("categories ok")

for comb, cp in tests["combinings"].items():
    assert int(comb) == unicodedata.combining(chr(int(cp, 16)))

print("combining ok")

for decimal, cp in tests["decimals"].items():
    if decimal:
        assert eval(decimal) == unicodedata.decimal(chr(int(cp, 16)))

print("decimals ok")

for decomp, cp in tests["decompositions"].items():
    assert decomp == unicodedata.decomposition(chr(int(cp, 16)))

print("decomposition ok")

for digit, cp in tests["digits"].items():
    if digit:
        assert eval(digit) == unicodedata.digit(chr(int(cp, 16)))

print("digits ok")

for name, cp in tests["names"].items():
コード例 #42
0
    ## Function
        1-unicodedata.lookup(name)
        2-unicodedata.name(chr[, default])
        3-unicodedata.decimal(chr[, default])
        4-unicodedata.digit(chr[, default])
        5-unicodedata.numeric(chr[, default])
        6-unicodedata.category(chr)
        7-unicodedata.bidirectional(chr)
        8-unicodedata.normalize(form, unistr)
'''
import unicodedata

print(unicodedata.lookup('LEFT CURLY BRACKET'))
print(unicodedata.lookup('RIGHT CURLY BRACKET'))
print(unicodedata.lookup('ASTERISK'))
#############################
print(unicodedata.name(u'/'))
print(unicodedata.name(u'|'))
print(unicodedata.name(u':'))
################################
print(unicodedata.decimal(u'9'))
print(unicodedata.decimal(u'5'))
####################################
print(unicodedata.decimal(u'0'))
print(unicodedata.decimal(u'1'))
###################################
print(unicodedata.category(u'A'))
print(unicodedata.category(u'b'))
###################################
print(unicodedata.bidirectional(u'\u0660'))
コード例 #43
0
# Test Unicode database APIs
import unicodedata

print 'Testing unicodedata module...',

assert unicodedata.digit(u'A',None) is None
assert unicodedata.digit(u'9') == 9
assert unicodedata.digit(u'\u215b',None) is None
assert unicodedata.digit(u'\u2468') == 9

assert unicodedata.numeric(u'A',None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0

assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None

assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'

assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'

assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
コード例 #44
0
import unicodedata

for char in [u"A", u"-", u"1", u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}"]:
    print repr(char),
    print unicodedata.category(char),
    print repr(unicodedata.decomposition(char)),
    print unicodedata.decimal(char, None),
    print unicodedata.numeric(char, None)

## u'A' Lu '' None None
## u'-' Pd '' None None
## u'1' Nd '' 1 1.0
## u'Ö' Lu '004F 0308' None None

コード例 #45
0
def setUpModule():
    log = logging.getLogger('unicodedata')

    log.info('generating unicodedata CSV')
    with tempfile.NamedTemporaryFile(prefix='unicode-',
                                     suffix='.csv') as csvfile:
        c = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
        for i in xrange(sys.maxunicode + 1):
            if i >= 5024 and i <= 5119:
                continue  # the Unicode Cherokee-Block is broken in Python 2.7 and Python 3.4 (maybe also 3.5)
            u = unichr(i)
            if unicodedata.category(u).startswith('C'):
                # [Cc]Other, Control
                # [Cf]Other, Format
                # [Cn]Other, Not Assigned
                # [Co]Other, Private Use
                # [Cs]Other, Surrogate
                continue
            row = (
                i,  # INT 0-1114111
                unicodedata.name(u,
                                 'UNICODE U+%08X' % i),  # VARCHAR(100) ASCII
                u,  # VARCHAR(1) UNICODE
                u.upper(),  # VARCHAR(1) UNICODE
                u.lower(),  # VARCHAR(1) UNICODE
                unicodedata.decimal(u, None),  # INT
                unicodedata.numeric(u, None),  # DOUBLE
                unicodedata.category(u),  # VARCHAR(3) ASCII
                unicodedata.bidirectional(u),  # VARCHAR(3) ASCII
                unicodedata.combining(u),  # VARCHAR(3) ASCII
                unicodedata.east_asian_width(u),  # VARCHAR(1) ASCII
                bool(unicodedata.mirrored),  # BOOLEAN
                unicodedata.decomposition(u),  # VARCHAR(10) ASCII
                unicodedata.normalize('NFC', u),  # VARCHAR(3) UNICODE
                unicodedata.normalize('NFD', u),  # VARCHAR(3) UNICODE
                unicodedata.normalize('NFKC', u),  # VARCHAR(3) UNICODE
                unicodedata.normalize('NFKD', u),  # VARCHAR(3) UNICODE
            )
            c.writerow(utf8encoder(row))
        csvfile.flush()

        log.info('loading CSV')
        sql = '''
            DROP SCHEMA utest CASCADE;
            CREATE SCHEMA utest;
            CREATE TABLE unicodedata (
                codepoint INT NOT NULL,
                name VARCHAR(100) ASCII,
                uchar VARCHAR(1) UTF8,
                to_upper VARCHAR(1) UTF8,
                to_lower VARCHAR(1) UTF8,
                decimal_value INT,
                numeric_value INT,
                category VARCHAR(3) ASCII,
                bidirectional VARCHAR(3) ASCII,
                combining VARCHAR(10) ASCII,
                east_asian_width VARCHAR(2) ASCII,                mirrored BOOLEAN,
                decomposition VARCHAR(100) ASCII,
                NFC VARCHAR(10) UTF8,
                NFD VARCHAR(10) UTF8,
                NFKC VARCHAR(20) UTF8,
                NFKD VARCHAR(20) UTF8
                );
            IMPORT INTO unicodedata
            FROM LOCAL CSV FILE '%s'
            ROW SEPARATOR = 'CRLF';
            ''' % os.path.join(os.getcwd(), csvfile.name)
        cmd = '''%(exaplus)s -c %(conn)s -u sys -P exasol
		        -no-config -autocommit ON -L -pipe''' % {
            'exaplus':
            os.environ.get(
                'EXAPLUS',
                '/usr/opt/EXASuite-4/EXASolution-4.2.9/bin/Console/exaplus'),
            'conn':
            udf.opts.server
        }
        env = os.environ.copy()
        env['PATH'] = '/usr/opt/jdk1.8.0_latest/bin:' + env['PATH']
        exaplus = subprocess.Popen(cmd.split(),
                                   env=env,
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT)
        out, _err = exaplus.communicate(sql)
    if exaplus.returncode != 0:
        log.critical('EXAplus error: %d', exaplus.returncode)
        log.error(out)
    else:
        log.debug(out)
コード例 #46
0
def check_food_qty_len(request):
    total_food_price = 0
    data = {}
    food_data = []
    pizzas = request.POST.getlist('pizzas', None)
    if len(pizzas) == 0:
        return False, 'Please select at least one pizza and quantity first.', data
    else:
        pizza_ins = Pizza.objects.filter(id__in=pizzas)
        for pizza in pizza_ins:
            temp_data = {}
            pizza_qty = 'pizzas_qty_' + str(pizza.id)
            input_pizza_qty = request.POST.get(pizza_qty, None)
            if input_pizza_qty is None or input_pizza_qty == '':
                return False, 'Pizza Quantity not given properly!', data
            temp_data['name'] = pizza.name
            temp_data['total_price'] = str(pizza.price *
                                           decimal(input_pizza_qty))
            temp_data['qty'] = input_pizza_qty
            food_data.append(temp_data)
            total_food_price += pizza.price * decimal(input_pizza_qty)

    gluten_cauliflower = request.POST.getlist('gluten_cauliflower', None)
    if len(gluten_cauliflower) != 0:
        gluten_cauliflower_ins = GlutenCauliflower.objects.filter(
            id__in=gluten_cauliflower)
        for gluten_cauliflower in gluten_cauliflower_ins:
            temp_data = {}
            gluten_cauliflower_qty = 'gluten_cauliflower_qty_' + str(
                gluten_cauliflower.id)
            input_gluten_cauliflower_qty = request.POST.get(
                gluten_cauliflower_qty, None)
            if input_gluten_cauliflower_qty is None or input_gluten_cauliflower_qty == '':
                return False, 'Gluten Free and Cauliflower Crust Quantity not given properly!', data
            temp_data['name'] = gluten_cauliflower.name
            temp_data['total_price'] = str(
                gluten_cauliflower.price *
                decimal(input_gluten_cauliflower_qty))
            temp_data['qty'] = input_gluten_cauliflower_qty
            food_data.append(temp_data)
            total_food_price += gluten_cauliflower.price * decimal(
                input_gluten_cauliflower_qty)

    # gluten_cauliflower_total_ins = GlutenCauliflower.objects.filter(id__in=pizzas).aggregate(Sum('price'))[
    #         'price__sum']
    #     total_price += gluten_cauliflower_total_ins

    wings_sauce = request.POST.getlist('wings_sauce', None)
    if len(wings_sauce) != 0:
        wings_sauce_ins = WingSauce.objects.filter(id__in=wings_sauce)
        for wings_sauce_data in wings_sauce_ins:
            temp_data = {}
            wings_sauce_qty = 'wings_sauce_qty_' + str(wings_sauce_data.id)
            input_wings_sauce_qty = request.POST.get(wings_sauce_qty, None)
            special_request = request.POST.get('wings_sauce_special_request',
                                               None)
            if input_wings_sauce_qty is None or input_wings_sauce_qty == '':
                return False, 'Wings Sauces Quantity not given properly!', data
            temp_data['name'] = wings_sauce_data.name
            temp_data['total_price'] = str(wings_sauce_data.price *
                                           decimal(input_wings_sauce_qty))
            temp_data['qty'] = input_wings_sauce_qty
            temp_data['special_request'] = special_request
            food_data.append(temp_data)
            total_food_price += wings_sauce_data.price * decimal(
                input_wings_sauce_qty)

    salad = request.POST.getlist('salad', None)
    if len(salad) != 0:
        salad_ins = Salad.objects.filter(id__in=salad)
        for salad_data in salad_ins:
            temp_data = {}
            salad_qty = 'salad_qty_' + str(salad_data.id)
            input_salad_qty = request.POST.get(salad_qty, None)
            special_request = request.POST.get('salad_special_request', None)
            if input_salad_qty is None or input_salad_qty == '':
                return False, 'Salads Quantity not given properly!', data
            temp_data['name'] = salad_data.name
            temp_data['total_price'] = str(salad_data.price *
                                           decimal(input_salad_qty))
            temp_data['qty'] = input_salad_qty
            temp_data['special_request'] = special_request
            food_data.append(temp_data)
            total_food_price += salad_data.price * decimal(input_salad_qty)

    salad_dressing = request.POST.getlist('salad_dressing', None)
    salad_dressing_qty = request.POST.getlist('salad_dressing_qty', None)
    if len(salad_dressing) != 0:
        salad_dressing_ins = SaladDressing.objects.filter(
            id__in=salad_dressing)
        for salad_dressing_data in salad_dressing_ins:
            temp_data = {}
            salad_dressing_qty = 'salad_dressing_qty_' + str(
                salad_dressing_data.id)
            input_salad_dressing_qty = request.POST.get(
                salad_dressing_qty, None)
            if input_salad_dressing_qty is None or input_salad_dressing_qty == '':
                return False, 'Salad Dressings Quantity not given properly!', data
            temp_data['name'] = salad_dressing_data.name
            temp_data['total_price'] = str(salad_dressing_data.price *
                                           decimal(input_salad_dressing_qty))
            temp_data['qty'] = input_salad_dressing_qty
            food_data.append(temp_data)
            total_food_price += salad_dressing_data.price * decimal(
                input_salad_dressing_qty)

    dessert = request.POST.getlist('dessert', None)
    dessert_qty = request.POST.getlist('dessert_qty', None)
    if len(dessert) != 0:
        dessert_ins = Dessert.objects.filter(id__in=dessert)
        for dessert_data in dessert_ins:
            temp_data = {}
            dessert_qty = 'dessert_qty_' + str(dessert_data.id)
            input_dessert_qty = request.POST.get(dessert_qty, None)
            special_request = request.POST.get('dessert_special_request', None)
            if input_dessert_qty is None or input_dessert_qty == '':
                return False, 'Desserts Quantity not given properly!', data
            temp_data['name'] = dessert_data.name
            temp_data['total_price'] = str(dessert_data.price *
                                           decimal(input_dessert_qty))
            temp_data['qty'] = input_dessert_qty
            temp_data['special_request'] = special_request
            food_data.append(temp_data)
            total_food_price += dessert_data.price * decimal(input_dessert_qty)

    bread = request.POST.getlist('bread', None)
    if len(bread) != 0:
        bread_ins = Bread.objects.filter(id__in=bread)
        for bread_data in bread_ins:
            temp_data = {}
            bread_qty = 'bread_qty_' + str(bread_data.id)
            input_bread_qty = request.POST.get(bread_qty, None)
            if input_bread_qty is None or input_bread_qty == '':
                return False, 'Breads Quantity not given properly!', data
            temp_data['name'] = bread_data.name
            temp_data['total_price'] = str(bread_data.price *
                                           decimal(input_bread_qty))
            temp_data['qty'] = input_bread_qty
            food_data.append(temp_data)
            total_food_price += bread_data.price * decimal(input_bread_qty)

    wing = request.POST.getlist('wing', None)
    wing_qty = request.POST.getlist('wing_qty', None)
    if len(wing) != 0:
        wing_ins = Wing.objects.filter(id__in=wing)
        for wing_data in wing_ins:
            temp_data = {}
            wing_qty = 'wing_qty_' + str(wing_data.id)
            input_wing_qty = request.POST.get(wing_qty, None)
            if input_wing_qty is None or input_wing_qty == '':
                return False, 'Wings Quantity not given properly!', data
            temp_data['name'] = wing_data.name
            temp_data['total_price'] = str(wing_data.price *
                                           decimal(input_wing_qty))
            temp_data['qty'] = input_wing_qty
            food_data.append(temp_data)
            total_food_price += wing_data.price * decimal(input_wing_qty)

    # print('total price: ', total_price)
    # print('food data: ', food_data)
    data = {'food_data': food_data, 'total_food_price': total_food_price}

    return True, '', data
コード例 #47
0
'''
unicodedata 模块

unicodedata 模块包含了 Unicode 字符的属性, 例如字符类别, 分解数据, 以及数值.
'''
import unicodedata

for char in [u'A', u'-', u'1', u'w']:
	print(char,'-> ' ,end = '')
	print(repr(char), '-> ' ,end = '')
	print(unicodedata.category(char), '-> ' ,end = '')
	print(repr(unicodedata.decomposition(char)), '-> ' ,end = '')
	print(unicodedata.decimal(char, None),'=> ', end = '')
	print(unicodedata.numeric(char,None), end = '')
	print()