Beispiel #1
0
def print_Unicode_info(char, short):
    name = unicodedata.name(char, "UNKNOWN")
    decCodepoint = ord(char)
    hexCodepoint = hex(decCodepoint)
    lower = char.lower()
    upper = char.upper()
    category = unicodedata.category(char)
    bidirectional = unicodedata.bidirectional(char)
    mirrored = True if (unicodedata.mirrored(char) == 1) else False
    nfc = unicodedata.normalize("NFC", char)
    nfd = unicodedata.normalize("NFD", char)

    if (short):
        print(char + "\t" + name + " (U+" + str(hexCodepoint).upper().replace("0X", "") + ")")
    else:
        print("Name          " + name)
        print("Character     " + char)
        print("Dec Codepoint " + str(decCodepoint))
        print("Hex Codepoint " + str(hexCodepoint))
        print("Lowercase     " + lower)
        print("Uppercase     " + upper)
        print("Category      " + category)
        print("Bidirectional " + bidirectional)
        print("Mirrored      " + str(mirrored))
        print("NFC           " + nfc)
        print("NFD           " + nfd)
Beispiel #2
0
 def test_ucd_510(self):
     import unicodedata
     # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
     self.assert_(unicodedata.mirrored(u"\u0f3a"))
     self.assert_(not unicodedata.ucd_3_2_0.mirrored(u"\u0f3a"))
     # Also, we now have two ways of representing
     # the upper-case mapping: as delta, or as absolute value
     self.assert_(u"a".upper()==u'A')
     self.assert_(u"\u1d79".upper()==u'\ua77d')
Beispiel #3
0
 def test_ucd_510(self):
     import unicodedata
     # In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
     self.assertTrue(unicodedata.mirrored("\u0f3a"))
     self.assertTrue(not unicodedata.ucd_3_2_0.mirrored("\u0f3a"))
     # Also, we now have two ways of representing
     # the upper-case mapping: as delta, or as absolute value
     self.assertTrue("a".upper() == 'A')
     self.assertTrue("\u1d79".upper() == '\ua77d')
     self.assertTrue(".".upper() == '.')
Beispiel #4
0
def iter_open_close_info4s():
    for i in range(CHAR_ORD_UPPER):
        char = chr(i)
        category = unicodedata.category(char)
        name = unicodedata.name(char, '')
        if (category in ('Ps', 'Pe', 'Pi', 'Pf')
            or unicodedata.mirrored(char)
            #or 'bracket' in name.lower() or 'paren' in name.lower()
            ):
            #print(i, name, '#', char)
            yield i, name, char, category
	def test_ucd_510(self):
		# stdlib
		import unicodedata

		# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
		self.assertTrue(unicodedata.mirrored('Я╝║'))
		self.assertTrue(not unicodedata.ucd_3_2_0.mirrored('Я╝║'))
		# Also, we now have two ways of representing
		# the upper-case mapping: as delta, or as absolute value
		self.assertTrue('a'.upper() == 'A')
		self.assertTrue('рх╣'.upper() == 'ЖЮй')
		self.assertTrue('.'.upper() == '.')
Beispiel #6
0
def apply_mirroring(storage):
    """
    Applies L4: mirroring: A character is depicted by a mirrored glyph if and
    only if (a) the resolved directionality of that character is R, and (b)
    the Bidi_Mirrored property value of that character is true.

    See: http://unicode.org/reports/tr9/#L4
    """
    for char in storage["chars"]:
        unichar = char["ch"]
        if mirrored(unichar) and embedding_direction(char["level"]) == "R":
            char["ch"] = MIRRORED_CHARACTER_PAIRS.get(unichar, unichar)
Beispiel #7
0
    def _explain_char(self, ch, further):
        try:
            name = unicodedata.name(ch)
        except ValueError:
            name = f'[U+{hex(ord(ch))[2:]}]'
        if not further:
            return name + f'({ch})'
        infos = {
            'category': unicodedata.category(ch),
            'direction': unicodedata.bidirectional(ch),
            'east asian width': unicodedata.east_asian_width(ch)
        }

        decomposition = unicodedata.decomposition(ch)
        if decomposition:
            infos['decomposition'] = decomposition

        try:
            infos['digit value'] = unicodedata.digit(ch)
        except ValueError:
            pass
        try:
            infos['decimal value'] = unicodedata.decimal(ch)
        except ValueError:
            pass
        try:
            infos['numeric value'] = unicodedata.numeric(ch)
        except ValueError:
            pass
        comb = unicodedata.combining(ch)
        if comb != 0:
            infos['combining class'] = str(comb)

        mirrored = unicodedata.mirrored(ch)
        if mirrored:
            infos['mirrored'] = 'yes'
        if hasattr(unicodedata, 'is_normalized'):
            forms = []
            for form in ('NFC', 'NFD', 'NFKC', 'NFKD'):
                if unicodedata.is_normalized(form, ch):
                    forms.append(form)
            if forms:
                infos['normalized'] = f'yes: {", ".join(forms)}'
            else:
                infos['normalized'] = 'no'
        else:
            infos['normalized'] = 'unavailable'

        info = ', '.join([f'{k}: {v}' for k, v in infos.items()])
        return f'{name}: {ch!r} ({info})'
def overview(tree_item):
    """ Returns an overview of the character
    """
    char = tree_item.obj
    return TEMPLATE.format(unicodedata.name(char, '<NO NAME AVAILABLE>'), char,
                           unicodedata.decimal(char, ''),
                           unicodedata.digit(char, ''),
                           unicodedata.numeric(char, ''),
                           unicodedata.category(char),
                           unicodedata.bidirectional(char),
                           unicodedata.combining(char),
                           unicodedata.east_asian_width(char),
                           unicodedata.mirrored(char),
                           unicodedata.decomposition(char))
Beispiel #9
0
def overview(tree_item):
    """ Returns an overview of the character
    """
    char = tree_item.obj
    return TEMPLATE.format(unicodedata.name(char, '<NO NAME AVAILABLE>'), 
                           char, 
                           unicodedata.decimal(char, ''),
                           unicodedata.digit(char, ''),
                           unicodedata.numeric(char, ''),
                           unicodedata.category(char),
                           unicodedata.bidirectional(char),
                           unicodedata.combining(char),
                           unicodedata.east_asian_width(char),
                           unicodedata.mirrored(char),
                           unicodedata.decomposition(char))                          
Beispiel #10
0
def char2info(ch):
    name = U.name(ch, None)
    decimal = U.decimal(ch, None)
    digit = U.digit(ch, None)
    numeric = U.numeric(ch, None)

    category = U.category(ch)
    bidirectional = U.bidirectional(ch)
    combining = U.combining(ch)
    east_asian_width = U.east_asian_width(ch)
    mirrored = U.mirrored(ch)
    decomposition = U.decomposition(ch)

    unicode = ord(ch)
    unicode_hex = hex(unicode)
    return dict(locals())
Beispiel #11
0
def apply_mirroring(storage, debug):
    """Applies L4: mirroring

    See: http://unicode.org/reports/tr9/#L4

    """
    # L4. A character is depicted by a mirrored glyph if and only if (a) the
    # resolved directionality of that character is R, and (b) the
    # Bidi_Mirrored property value of that character is true.
    for _ch in storage['chars']:
        unichar = _ch['ch']
        if mirrored(unichar) and \
                _embedding_direction(_ch['level']) == 'R':
            _ch['ch'] = MIRRORED.get(unichar, unichar)

    if debug:
        debug_storage(storage)
Beispiel #12
0
    def test_compare_functions(self):
        def getX(fun, code):
            try:
                return getattr(unicodedb_5_2_0, fun)(code)
            except KeyError:
                return -1

        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_5_2_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
Beispiel #13
0
def apply_mirroring(storage, debug):
    """Applies L4: mirroring

    See: http://unicode.org/reports/tr9/#L4

    """
    # L4. A character is depicted by a mirrored glyph if and only if (a) the
    # resolved directionality of that character is R, and (b) the
    # Bidi_Mirrored property value of that character is true.
    for _ch in storage['chars']:
        unichar = _ch['ch']
        if mirrored(unichar) and \
                _embedding_direction(_ch['level']) == 'R':
            _ch['ch'] = MIRRORED.get(unichar, unichar)

    if debug:
        debug_storage(storage)
Beispiel #14
0
    def test_compare_functions(self):
        def getX(fun, code):
            try:
                return getattr(unicodedb_5_2_0, fun)(code)
            except KeyError:
                return -1

        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_5_2_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
Beispiel #15
0
def main():
  try:
    v = bytes(int(x, 16) for x in sys.argv[1:])
    c = v.decode('utf8')
    print('gryph:            %s' % c)
    print('codepoint:        U+%x' % ord(c))
    print('name:             %s' % unicodedata.name(c, 'Unknown'))
    print('decimal:          %s' % unicodedata.decimal(c, 'Unknown'))
    print('digit:            %s' % unicodedata.digit(c, 'Unknown'))
    print('numeric:          %s' % unicodedata.numeric(c, 'Unknown'))
    print('category:         %s' % unicodedata.category(c))
    print('bidirectional:    %s' % unicodedata.bidirectional(c))
    print('combining:        %s' % unicodedata.combining(c))
    print('east_asian_width: %s' % unicodedata.east_asian_width(c))
    print('mirrored:         %s' % unicodedata.mirrored(c))
    print('decomposition:    %s' % unicodedata.decomposition(c))
  except Exception as ex:
    print('ERROR: %s' % ex)
Beispiel #16
0
def main():
    try:
        v = bytes(int(x, 16) for x in sys.argv[1:])
        c = v.decode('utf8')
        print('gryph:            %s' % c)
        print('codepoint:        U+%x' % ord(c))
        print('name:             %s' % unicodedata.name(c, 'Unknown'))
        print('decimal:          %s' % unicodedata.decimal(c, 'Unknown'))
        print('digit:            %s' % unicodedata.digit(c, 'Unknown'))
        print('numeric:          %s' % unicodedata.numeric(c, 'Unknown'))
        print('category:         %s' % unicodedata.category(c))
        print('bidirectional:    %s' % unicodedata.bidirectional(c))
        print('combining:        %s' % unicodedata.combining(c))
        print('east_asian_width: %s' % unicodedata.east_asian_width(c))
        print('mirrored:         %s' % unicodedata.mirrored(c))
        print('decomposition:    %s' % unicodedata.decomposition(c))
    except Exception as ex:
        print('ERROR: %s' % ex)
	def test_function_checksum(self):
		h = hashlib.sha1()  # nosec: B303

		for i in range(sys.maxunicode + 1):
			char = chr(i)
			data = [
					# Properties
					format(self.db.digit(char, -1), ".12g"),
					format(self.db.numeric(char, -1), ".12g"),
					format(self.db.decimal(char, -1), ".12g"),
					unicodedata.category(char),
					unicodedata.bidirectional(char),
					unicodedata.decomposition(char),
					str(unicodedata.mirrored(char)),
					str(unicodedata.combining(char)),
					]
			h.update(''.join(data).encode("ascii"))
		result = h.hexdigest()
		self.assertEqual(result, self.expectedchecksum)
Beispiel #18
0
    def __init__(self, symbol):

        self.symbol = symbol
        self.name = u.name(symbol, 'NO_NAME_FOUND')
        self.decimal = u.decimal(self.symbol, -1)
        self.digit = u.digit(self.symbol, -1)
        self.numeric = u.numeric(self.symbol, -1)
        self.category = u.category(self.symbol)
        self.bidirectional = u.bidirectional(self.symbol)
        self.combining = u.combining(self.symbol)
        self.east_asian_width = u.east_asian_width(self.symbol)
        self.mirrored = u.mirrored(self.symbol)
        self.decomposition = u.decomposition(self.symbol)
        self.normalize_nfc = u.normalize('NFC', self.symbol)
        self.normalize_nkfc = u.normalize('NFKC', self.symbol)
        self.normalize_nfd = u.normalize('NFD', self.symbol)
        self.normalize_nkfd = u.normalize('NFKD', self.symbol)

        if Config.debug['unicode']:
            self.print_debug()
Beispiel #19
0
    def test_compare_functions(self):
        import unicodedata # CPython implementation

        def getX(fun, code):
            if fun == 'numeric' and code in self.diff_numeric:
                return -1
            try:
                return getattr(unicodedb_4_1_0, fun)(code)
            except KeyError:
                return -1
        
        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_4_1_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_4_1_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_4_1_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_4_1_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_4_1_0.combining(code)
    def test_compare_functions(self):
        import unicodedata # CPython implementation

        def getX(fun, code):
            if fun == 'numeric' and code in self.diff_numeric:
                return -1
            try:
                return getattr(unicodedb_4_1_0, fun)(code)
            except KeyError:
                return -1
        
        for code in range(0x10000):
            char = unichr(code)
            assert unicodedata.digit(char, -1) == getX('digit', code)
            assert unicodedata.numeric(char, -1) == getX('numeric', code)
            assert unicodedata.decimal(char, -1) == getX('decimal', code)
            assert unicodedata.category(char) == unicodedb_4_1_0.category(code)
            assert unicodedata.bidirectional(char) == unicodedb_4_1_0.bidirectional(code)
            assert unicodedata.decomposition(char) == unicodedb_4_1_0.decomposition(code)
            assert unicodedata.mirrored(char) == unicodedb_4_1_0.mirrored(code)
            assert unicodedata.combining(char) == unicodedb_4_1_0.combining(code)
    def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
    def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
Beispiel #23
0
    def to_str(s):
        # ";" WARNING! ======================================================
        if len(s) > 1 and s[0] == '$':
            return s  #[1:] # $VARIABLE HACK!

        # TODO: Make it so that combining/rtl characters
        # are referenced by name, rather than the original
        # characters codepoints etc

        if not s:
            return '{{}}'

        from unicodedata import name, category, mirrored
        LOut = []
        for c in s:
            if (not category(
                    str(c)) in {'Mc', 'Cc', 'Cf', 'Zl', 'Zp', 'Zs', 'Mn'}
                    and not mirrored(str(c)) and not c in '=,;'):
                LOut.append(c)
            else:
                LOut.append('{{%s}}' % name(str(c)))

        return ''.join(LOut)
Beispiel #24
0
def test_against_unicodedata():
    '''
    Check against `unicodedata` or `unicodedata2` if available with the
    correct version of Unicode.
    '''
    if unicodedata is None:
        raise Exception(
            'Packages unicodedata and unicodedata2 are not available with the necessary version of Unicode ({0}); many consistency tests were omitted'
            .format(mdl.UNICODE_VERSION))
    ucdf = mdl.UCDFiles()

    ud = ucdf.unicodedata
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        if cp in ud:
            name = unicodedata.name(c, None)
            if name is None:
                # Handle missing names in unicodedata
                # Compare Table 4-13 in Unicode Standard
                # http://www.unicode.org/versions/Unicode9.0.0/ch04.pdf
                if 0x17000 <= cp <= 0x187EC:
                    assert ud[cp]['Name'] == 'TANGUT IDEOGRAPH-{0:04X}'.format(
                        cp)
                else:
                    assert ud[cp]['Name'] == ''
            else:
                assert name == ud[cp]['Name']
            decimal, digit, numeric = (unicodedata.decimal(c, None),
                                       unicodedata.digit(c, None),
                                       unicodedata.numeric(c, None))
            if any(x is not None for x in (decimal, digit, numeric)):
                if decimal is not None:
                    assert decimal == int(ud[cp]['Numeric_Value']) and ud[cp][
                        'Numeric_Type'] == 'Decimal' and digit is not None and decimal is not None
                elif digit is not None:
                    assert digit == int(ud[cp]['Numeric_Value']) and ud[cp][
                        'Numeric_Type'] == 'Digit' and decimal is None and numeric is not None
                elif numeric is not None:
                    try:
                        num = float(ud[cp]['Numeric_Value'])
                    except ValueError:
                        if '/' in ud[cp]['Numeric_Value']:
                            numerator, denominator = ud[cp][
                                'Numeric_Value'].split('/')
                            num = float(numerator) / float(denominator)
                        else:
                            raise
                    assert numeric == num and ud[cp][
                        'Numeric_Type'] == 'Numeric' and digit is None and decimal is None
                else:
                    raise Exception
            else:
                assert ud[cp]['Numeric_Value'] == 'NaN' and ud[cp][
                    'Numeric_Type'] == 'None'
            assert unicodedata.category(c) == ud[cp]['General_Category']
            assert unicodedata.bidirectional(c) == ud[cp]['Bidi_Class']
            assert unicodedata.combining(c) == int(
                ud[cp]['Canonical_Combining_Class'])
            assert unicodedata.mirrored(c) == ud[cp]['Bidi_Mirrored']
            if unicodedata.decomposition(c) == '':
                if ud[cp]['Name'].startswith('HANGUL SYLLABLE'):
                    # The Hangul syllables lack decomposition mapping in
                    # unicodedata, so calculate with a full decomposition
                    # followed by a partial composition (Unicode Standard,
                    # chapter 3.12)
                    decomp = unicodedata.normalize('NFD', c)
                    if len(decomp) == 3:
                        decomp = unicodedata.normalize('NFC',
                                                       decomp[:2]) + decomp[-1]
                    decomp = tuple(ord(x) for x in decomp)
                    assert decomp == ud[cp]['Decomposition_Mapping']
                else:
                    assert ud[cp]['Decomposition_Mapping'] == (cp, )
            else:
                x = unicodedata.decomposition(c)
                if '<' in x:
                    x = x.split('>', 1)[1].strip()
                x = tuple(int(y, 16) for y in x.split('\x20'))
                assert x == ud[cp]['Decomposition_Mapping']

    dbc = ucdf.derivedbidiclass
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        # Only compare assigned code points, because unicodedata and
        # unicodedata2 lack correct defaults for unassigned
        if cp in dbc and cp in ud:
            assert unicodedata.bidirectional(c) == dbc[cp]['Bidi_Class']

    eaw = ucdf.eastasianwidth
    deaw = ucdf.derivedeastasianwidth
    for cp in range(0, 0x10FFFF + 1):
        c = chr(cp)
        # Only compare assigned code points, because unicodedata and
        # unicodedata2 lack correct defaults for unassigned
        if cp in eaw and cp in ud:
            assert unicodedata.east_asian_width(
                c) == eaw[cp]['East_Asian_Width']
        if cp in deaw and cp in ud:
            assert unicodedata.east_asian_width(
                c) == deaw[cp]['East_Asian_Width']
Beispiel #25
0
    async def charinfo(self, *, data: str):
        """Shows information about one or several characters.

        'data' can either be a character, a unicode escape sequence, a unicode character name or a string.
        If 'data' is a string only a summary of each character's info will be displayed.
        """
        data = data.lower()

        if data.startswith('\\u'):
            # Let's interpret the unicode escape sequence
            hex_values = data.split('\\u')[1:]
            try:
                code_points = [int(val, 16) for val in hex_values]
            except ValueError:
                await self.bot.say('Invalid unicode escape sequence.')
                return
            else:
                data = ''.join(chr(cp) for cp in code_points)
        elif len(data) > 1:
            # Maybe we've been given the character's name ?
            try:
                data = unicodedata.lookup(data)
            except KeyError:
                pass

        # Normalise the input
        data = unicodedata.normalize('NFC', data)
        url_fmt = '<http://unicode-table.com/en/{:X}>'

        if len(data) == 1:
            # Detailed info on the character
            entries = [
                ('Character', data),
                ('Name', unicodedata.name(data, 'None')),
                ('Code point', '{:04x}'.format(ord(data)))
            ]
            decomposition = unicodedata.decomposition(data)
            if decomposition != '':
                entries.append(('Decomposition', decomposition))

            combining = unicodedata.combining(data)
            if combining:
                entries.append(('Combining class', combining))

            entries.append(('Category', unicodedata.category(data)))
            bidirectional = unicodedata.bidirectional(data)
            entries.append(('Bidirectional', bidirectional if bidirectional != '' else 'None'))
            entries.append(('Mirrored', 'True' if unicodedata.mirrored(data) == 1 else 'False'))
            entries.append(('East asian width', unicodedata.east_asian_width(data)))
            entries.append(('Url', url_fmt.format(ord(data))))

            # Create the message's content and send it
            content = utils.indented_entry_to_str(entries)
            await self.bot.say_block(content)
        else:
            # Minimal info for each character
            entries = []
            for char in data:
                entries.append('{} | `\\u{:04x}` | {} | {}'.format(char,
                                                                   ord(char),
                                                                   unicodedata.name(char, 'None'),
                                                                   url_fmt.format(ord(char))))
            content = '\n'.join(entries)
            await self.bot.say(content)
Beispiel #26
0
 def mirrored(self):
     """Return unicodedata.mirrored."""
     return unicodedata.mirrored(self.c)
Beispiel #27
0
def is_open_close_char(char):
    return mirrored(char) or category(char) in categories
Beispiel #28
0
    async def unicode(self, ctx, *, arg):
        """Returns the information on a Unicode character or named character."""

        if len(arg) == 1:
            chars = [arg]
        else:
            #if " " in arg[1:-1] or "," in arg[1:-1] or ";" in arg[1:-1]:
            #    arg = arg[:0] + arg[1:-1].replace(",", " ").replace(";", " ") + arg[-1:]

            # try to find what character is meant
            # if starts with "U+", "\x", "\u", it"s hex

            if arg.upper().startswith("U+") or arg.upper().startswith(
                    "\\U") or arg.upper().startswith("\\X"):
                arg = "0x" + arg[2:].strip()
            try:
                if arg.lower().startswith("0x"):
                    arg = arg[2:]
                chars = [chr(int(arg, 16))]
            except ValueError:
                # otherwise, use name lookup
                try:
                    chars = [unicodedata.lookup(arg)]
                except KeyError:
                    chars = arg
                    #await ctx.send(error("Character not found: `{}`".format(arg)))
                    #return

        embeds = []
        n = 0
        for char in chars:
            n += 1
            value = ord(char)
            name = unicodedata.name(char, None)
            #name_url = name.lower().replace(" ", "-")
            dt = {}
            dt["Character"] = char
            dt["Name"] = name  # str or None
            dt["Decimal"] = unicodedata.decimal(char, None)  # int or None
            dt["Digit"] = unicodedata.digit(char, None)  # int or None
            dt["Numeric"] = unicodedata.numeric(char, None)  # float or None
            dt["Category"] = unicodedata.category(char)  # str
            dt["Bidirectional"] = unicodedata.bidirectional(char)  # str
            dt["Combining class"] = unicodedata.combining(char)  # str
            dt["East Asian width"] = unicodedata.east_asian_width(char)  # str
            dt["Mirrored"] = unicodedata.mirrored(char)  # int
            dt["Decomposition"] = unicodedata.decomposition(char)  # str

            embed = discord.Embed(
                title="Unicode codepoints of: {input}".format(input=arg),
                #url="https://emojipedia.org/{}/".format(name_url),
                description="About Unicode U+{codepoint:04X}.".format(
                    codepoint=value))

            for k, v in dt.items():
                if not v is None and len(str(v)):
                    if len(
                            str(v).strip(
                                " \t\r\n\v\f\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
                            )) == 0:
                        v = '"{}"'.format(v)
                    embed.add_field(name=k, value=str(v), inline=False)
            embed.set_footer(text="Character {index} of {count}".format(
                index=n, count=len(arg)))
            embeds.append(embed)

        if len(embeds) > 1:
            await menu(ctx, embeds, DEFAULT_CONTROLS)
        else:
            await ctx.send(embed=embeds[0])
Beispiel #29
0
import unicodedata
print(unicodedata.bidirectional('$'))
print(unicodedata.category('$'))
print(unicodedata.combining('7'))
print(unicodedata.decimal('1'))
print(unicodedata.decomposition('\u00fc'))
print(unicodedata.digit('7'))
print(unicodedata.lookup('COPYRIGHT SIGN'))
print(unicodedata.mirrored('('))
print(unicodedata.name('\u00fc'))
print(len(unicodedata.normalize('NFC','resume\u0301')))
print(len(unicodedata.normalize('NFD','resume\u0301')))
print(unicodedata.normalize('NFKD','\u2165'))
print(unicodedata.numeric('\u2157'))
print(unicodedata.unidata_version)
Beispiel #30
0
 def printUnicodeInfo(text, description):
     print("{}:".format(description))
     for j, char in enumerate(text):
         print( "{:2} {:04x} {} '{}'   (cat={} bid={} comb={} mirr={})" \
             .format(j, ord(char), unicodedata.name(char), char, unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.mirrored(char) ) )
Beispiel #31
0
assert unicodedata.numeric(u'A', None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0

assert unicodedata.decimal(u'A', None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b', None) is None
assert unicodedata.decimal(u'\u2468', None) is None

assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'

assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'

assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'

assert unicodedata.mirrored(u'\uFFFE') == 0
assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1

assert unicodedata.combining(u'\uFFFE') == 0
assert unicodedata.combining(u'a') == 0
assert unicodedata.combining(u'\u20e1') == 230

print 'done.'
Beispiel #32
0
 def mirrored(self):
     return ud.mirrored(self.char)
Beispiel #33
0
    def printUnicodeData(self, text=None):
        """
        """
        #print( "unicodedata", unicodedata.unidata_version )

        if text is None: text = self.currentText

        #def printUnicodeInfo( text, description ):
        #print( "{}:".format( description ) )
        for j, char in enumerate(text):
            print( "{:2} {:04x} {} {!r}   (cat={} bid={} comb={} mirr={})" \
                .format(j, ord(char), unicodedata.name(char), char, unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.mirrored(char) ) )
verify(unicodedata.numeric(u"A", None) is None)
verify(unicodedata.numeric(u"9") == 9)
verify(unicodedata.numeric(u"\u215b") == 0.125)
verify(unicodedata.numeric(u"\u2468") == 9.0)

verify(unicodedata.decimal(u"A", None) is None)
verify(unicodedata.decimal(u"9") == 9)
verify(unicodedata.decimal(u"\u215b", None) is None)
verify(unicodedata.decimal(u"\u2468", None) is None)

verify(unicodedata.category(u"\uFFFE") == "Cn")
verify(unicodedata.category(u"a") == "Ll")
verify(unicodedata.category(u"A") == "Lu")

verify(unicodedata.bidirectional(u"\uFFFE") == "")
verify(unicodedata.bidirectional(u" ") == "WS")
verify(unicodedata.bidirectional(u"A") == "L")

verify(unicodedata.decomposition(u"\uFFFE") == "")
verify(unicodedata.decomposition(u"\u00bc") == "<fraction> 0031 2044 0034")

verify(unicodedata.mirrored(u"\uFFFE") == 0)
verify(unicodedata.mirrored(u"a") == 0)
verify(unicodedata.mirrored(u"\u2201") == 1)

verify(unicodedata.combining(u"\uFFFE") == 0)
verify(unicodedata.combining(u"a") == 0)
verify(unicodedata.combining(u"\u20e1") == 230)

print "ok"
verify(unicodedata.numeric(u'A',None) is None)
verify(unicodedata.numeric(u'9') == 9)
verify(unicodedata.numeric(u'\u215b') == 0.125)
verify(unicodedata.numeric(u'\u2468') == 9.0)

verify(unicodedata.decimal(u'A',None) is None)
verify(unicodedata.decimal(u'9') == 9)
verify(unicodedata.decimal(u'\u215b',None) is None)
verify(unicodedata.decimal(u'\u2468',None) is None)

verify(unicodedata.category(u'\uFFFE') == 'Cn')
verify(unicodedata.category(u'a') == 'Ll')
verify(unicodedata.category(u'A') == 'Lu')

verify(unicodedata.bidirectional(u'\uFFFE') == '')
verify(unicodedata.bidirectional(u' ') == 'WS')
verify(unicodedata.bidirectional(u'A') == 'L')

verify(unicodedata.decomposition(u'\uFFFE') == '')
verify(unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034')

verify(unicodedata.mirrored(u'\uFFFE') == 0)
verify(unicodedata.mirrored(u'a') == 0)
verify(unicodedata.mirrored(u'\u2201') == 1)

verify(unicodedata.combining(u'\uFFFE') == 0)
verify(unicodedata.combining(u'a') == 0)
verify(unicodedata.combining(u'\u20e1') == 230)

print 'ok'
Beispiel #36
0
    def printUnicodeData( self, text:Optional[str]=None ) -> None:
        """
        """
        #dPrint( 'Quiet', debuggingThisModule, "unicodedata", unicodedata.unidata_version )

        if text is None: text = self.currentText

        #def printUnicodeInfo( text, description ):
            #dPrint( 'Quiet', debuggingThisModule, "{}:".format( description ) )
        for j,char in enumerate(text):
            vPrint( 'Quiet', debuggingThisModule, "{:2} {:04x} {} {!r}   (cat={} bid={} comb={} mirr={})" \
                .format(j, ord(char), unicodedata.name(char), char, unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.mirrored(char) ) )
Beispiel #37
0
import unicodedata


if __name__ == "__main__":
    s = "hello world, Lcoderfit"
    print(unicodedata.lookup('left curly bracket'))

    print(unicodedata.name('\\'))
    print(unicodedata.decimal("1"))
    print(unicodedata.digit("4"))
    print(unicodedata.numeric("9"))
    print(unicodedata.category("/"))

    print(unicodedata.bidirectional("b"))

    print(unicodedata.east_asian_width("b"))
    print(unicodedata.mirrored("{}"))
verify(unicodedata.numeric(u'A',None) is None)
verify(unicodedata.numeric(u'9') == 9)
verify(unicodedata.numeric(u'\u215b') == 0.125)
verify(unicodedata.numeric(u'\u2468') == 9.0)

verify(unicodedata.decimal(u'A',None) is None)
verify(unicodedata.decimal(u'9') == 9)
verify(unicodedata.decimal(u'\u215b',None) is None)
verify(unicodedata.decimal(u'\u2468',None) is None)

verify(unicodedata.category(u'\uFFFE') == 'Cn')
verify(unicodedata.category(u'a') == 'Ll')
verify(unicodedata.category(u'A') == 'Lu')

verify(unicodedata.bidirectional(u'\uFFFE') == '')
verify(unicodedata.bidirectional(u' ') == 'WS')
verify(unicodedata.bidirectional(u'A') == 'L')

verify(unicodedata.decomposition(u'\uFFFE') == '')
verify(unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034')

verify(unicodedata.mirrored(u'\uFFFE') == 0)
verify(unicodedata.mirrored(u'a') == 0)
verify(unicodedata.mirrored(u'\u2201') == 1)

verify(unicodedata.combining(u'\uFFFE') == 0)
verify(unicodedata.combining(u'a') == 0)
verify(unicodedata.combining(u'\u20e1') == 230)

print 'ok'
Beispiel #39
0
""" Test script for the unicodedata module.
    Written by Marc-Andre Lemburg ([email protected]).
    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test_support import verify, verbose
import sha
encoding = 'utf-8'
def test_methods():
    h = sha.sha()
    for i in range(65536):
        char = unichr(i)
        data = [
            # Predicates (single char)
            char.isalnum() and u'1' or u'0',
            char.isalpha() and u'1' or u'0',
            char.isdecimal() and u'1' or u'0',
            char.isdigit() and u'1' or u'0',
            char.islower() and u'1' or u'0',
            char.isnumeric() and u'1' or u'0',
            char.isspace() and u'1' or u'0',
            char.istitle() and u'1' or u'0',
            char.isupper() and u'1' or u'0',
            # Predicates (multiple chars)
            (char + u'abc').isalnum() and u'1' or u'0',
            (char + u'abc').isalpha() and u'1' or u'0',
            (char + u'123').isdecimal() and u'1' or u'0',
            (char + u'123').isdigit() and u'1' or u'0',
            (char + u'abc').islower() and u'1' or u'0',
            (char + u'123').isnumeric() and u'1' or u'0',
            (char + u' \t').isspace() and u'1' or u'0',
 def test_mirrored(self):
     import unicodedata
     # For no reason, unicodedata.mirrored() returns an int, not a bool
     assert repr(unicodedata.mirrored(' ')) == '0'
Beispiel #41
0
    def test_mirrored(self):
        import unicodedata

        # For no reason, unicodedata.mirrored() returns an int, not a bool
        assert repr(unicodedata.mirrored(u" ")) == "0"
Beispiel #42
0
 def printUnicodeInfo( text, description ):
     print( "{}:".format( description ) )
     for j,char in enumerate(text):
         print( "{:2} {:04x} {} '{}'   (cat={} bid={} comb={} mirr={})" \
             .format(j, ord(char), unicodedata.name(char), char, unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.mirrored(char) ) )
Beispiel #43
0
""" Test script for the unicodedata module.
    Written by Marc-Andre Lemburg ([email protected]).
    (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test_support import verify, verbose
import sha
encoding = 'utf-8'

def test_methods():
    h = sha.sha()
    for i in range(65536):
        char = unichr(i)
        data = [
            # Predicates (single char)
            char.isalnum() and u'1' or u'0',
            char.isalpha() and u'1' or u'0',
            char.isdecimal() and u'1' or u'0',
            char.isdigit() and u'1' or u'0',
            char.islower() and u'1' or u'0',
            char.isnumeric() and u'1' or u'0',
            char.isspace() and u'1' or u'0',
            char.istitle() and u'1' or u'0',
            char.isupper() and u'1' or u'0',
            # Predicates (multiple chars)
            (char + u'abc').isalnum() and u'1' or u'0',
            (char + u'abc').isalpha() and u'1' or u'0',
            (char + u'123').isdecimal() and u'1' or u'0',
            (char + u'123').isdigit() and u'1' or u'0',
            (char + u'abc').islower() and u'1' or u'0',
            (char + u'123').isnumeric() and u'1' or u'0',
Beispiel #44
0
print(unicodedata.normalize('NFC',
                            s2))  # 'e'和'\u0301'被合并为一个字符é,因而返回结果为'café'(输出不带引号)
print(unicodedata.normalize(
    'NFD', s1))  # NFD使组合字符拆开为两个字符,这里'é'被拆为'e'和重音符,即输出结果为:'cafeˋ'
print(
    unicodedata.normalize('NFD', s2)
)  # s2最后两个字符为'e'和'\u0301',(我不知道内部机理是什么,接下来的叙述是我自己的理解,不知道正确与否),直接将'\u0301'解释为重音符'ˋ',输出为'cafeˋ'

print("-------------------")
print(unicodedata.east_asian_width('我'))
print(unicodedata.east_asian_width('1'))
print(unicodedata.east_asian_width('a'))
print(unicodedata.east_asian_width('ﷺ'))
# F:fullwidth,H:halfwidth,W:wide,Na:narrow,A:ambiguous(不明确),N:natural(正常)

print(unicodedata.mirrored('薛'))  # 不懂

print(unicodedata.decomposition('ﷺ'))  # 可分解
print(unicodedata.decomposition('é'))  # 可分解
print(unicodedata.decomposition('e'))  # 不可分解,所以返回空值(输出就是一片空白)

# 判断 Unicode 字符串 unistr 是否为正规形式 form。 form 的有效值为 'NFC', 'NFKC', 'NFD' 和 'NFKD'
# 3.8
# print(unicodedata.is_normalized('NFC','a')) # true
# print(unicodedata.is_normalized('NFC','ﷺ')) # true
# print(unicodedata.is_normalized('NFKD','ﷺ')) # false

print(unicodedata.unidata_version)
print(unicodedata.ucd_3_2_0)

#print('const CATEGORY_e CHAR_CATEGORIES[] = {%s};' % ', '.join(unicodedata.category(chr(codepoint)) for codepoint in range(0x110000)))
assert unicodedata.numeric(u'A',None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0

assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None

assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'

assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'

assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'

assert unicodedata.mirrored(u'\uFFFE') == 0
assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1

assert unicodedata.combining(u'\uFFFE') == 0
assert unicodedata.combining(u'a') == 0
assert unicodedata.combining(u'\u20e1') == 230

print 'done.'
Beispiel #46
0
    def printUnicodeData( self, text=None ):
        """
        """
        #print( "unicodedata", unicodedata.unidata_version )

        if text is None: text = self.currentText

        #def printUnicodeInfo( text, description ):
            #print( "{}:".format( description ) )
        for j,char in enumerate(text):
            print( "{:2} {:04x} {} {!r}   (cat={} bid={} comb={} mirr={})" \
                .format(j, ord(char), unicodedata.name(char), char, unicodedata.category(char), unicodedata.bidirectional(char), unicodedata.combining(char), unicodedata.mirrored(char) ) )