コード例 #1
0
def test_digit_chars_contains_all_valid_unicode_digit_characters():
    for i in py23_range(0X10FFFF):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in set('0123456789'):
            continue
        if unicodedata.digit(a, None) is not None:
            assert a in digit_chars
コード例 #2
0
def test_digit_chars_contains_all_valid_unicode_digit_characters():
    set_numeric_hex = set(numeric_hex)
    set_numeric_chars = set(numeric_chars)
    for i in py23_range(0X110000):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in set('0123456789'):
            continue
        if unicodedata.digit(a, None) is not None:
            assert i in set_numeric_hex
            assert a in set_numeric_chars
コード例 #3
0
ファイル: locale.py プロジェクト: SethMMorton/natsort
having to worry about if it is using PyICU or the built-in locale.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

# Std. lib imports.
import sys
from functools import cmp_to_key

# Local imports.
from natsort.compat.py23 import PY_VERSION, py23_unichr

# This string should be sorted after any other byte string because
# it contains the max unicode character repeated 20 times.
# You would need some odd data to come after that.
null_string = ""
null_string_max = py23_unichr(sys.maxunicode) * 20

# Make the strxfrm function from strcoll on Python2
# It can be buggy (especially on BSD-based systems),
# so prefer icu if available.
try:  # noqa: C901
    import icu
    from locale import getlocale

    null_string_locale = b""

    # This string should in theory be sorted after any other byte
    # string because it contains the max byte char repeated many times.
    # You would need some odd data to come after that.
    null_string_locale_max = b"x7f" * 50
コード例 #4
0
def load_locale(x):
    """ Convenience to load a locale, trying ISO8859-1 first."""
    try:
        locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x)))
    except:
        locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x)))

# Check if de_DE is installed.
try:
    load_locale('de_DE')
    has_locale_de_DE = True
except locale.Error:
    has_locale_de_DE = False

# Depending on the python version, use lower or casefold
# to make a string lowercase.
try:
    low = py23_str.casefold
except AttributeError:
    low = py23_str.lower

# There are some unicode values that are known failures on BSD systems
# that has nothing to do with natsort (a ValueError is raised by strxfrm).
# Let's filter them out.
try:
    bad_uni_chars = set(py23_unichr(x) for x in py23_range(0X10fefd,
                                                           0X10ffff+1))
except ValueError:
    # Narrow unicode build... no worries.
    bad_uni_chars = set()
コード例 #5
0
import pytest
from hypothesis import example, given
from hypothesis.strategies import floats, integers, text
from natsort.compat.fastnumbers import fast_float, fast_int
from natsort.compat.locale import get_strxfrm
from natsort.compat.py23 import py23_range, py23_str, py23_unichr
from natsort.ns_enum import NS_DUMB, ns
from natsort.utils import groupletters, string_component_transform_factory

# There are some unicode values that are known failures with the builtin locale
# library on BSD systems that has nothing to do with natsort (a ValueError is
# raised by strxfrm). Let's filter them out.
try:
    bad_uni_chars = frozenset(
        py23_unichr(x) for x in py23_range(0X10fefd, 0X10ffff + 1)
    )
except ValueError:
    # Narrow unicode build... no worries.
    bad_uni_chars = frozenset()


def no_bad_uni_chars(x, _bad_chars=bad_uni_chars):
    """Ensure text does not contain bad unicode characters"""
    return not any(y in _bad_chars for y in x)


def no_null(x):
    """Ensure text does not contain a null character."""
    return "\0" not in x
コード例 #6
0
    0x20AEA,
    0x20AFD,
    0x20B19,
    0x22390,
    0x22998,
    0x23B1B,
    0x2626D,
    0x2F890,
)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None]

# Create a single string with the above data.
digits = "".join(digit_chars)
numeric = "".join(numeric_chars)
コード例 #7
0
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import unicodedata

from natsort.compat.py23 import py23_unichr
from natsort.unicode_numeric_hex import numeric_hex

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        character = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(character, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(character)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None]

# Create a single string with the above data.
decimals = "".join(decimal_chars)
コード例 #8
0
ファイル: unicode_numbers.py プロジェクト: DarkSir23/mylar
    0X1E957, 0X1E958, 0X1E959, 0X1F100, 0X1F101, 0X1F102,
    0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107, 0X1F108,
    0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064,
    0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C,
    0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, 0X23B1B,
    0X2626D, 0X2F890,
)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars
               if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars
                 if unicodedata.decimal(a, None) is not None]
コード例 #9
0
    0X20983,
    0X2098C,
    0X2099C,
    0X20AEA,
    0X20AFD,
    0X20B19,
    0X22390,
    0X22998,
    0X23B1B,
    0X2626D,
    0X2F890,
)

# Some code that can be used to create the above list of hex numbers.
if __name__ == "__main__":
    import unicodedata
    from natsort.compat.py23 import py23_range, py23_unichr

    hex_chars = []
    for i in py23_range(0X110000):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in "0123456789":
            continue
        if unicodedata.numeric(a, None) is not None:
            hex_chars.append(i)

    print(", ".join(["0X{:X}".format(i) for i in hex_chars]))
コード例 #10
0
    0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, 0X1D7FE, 0X1D7FF, 0X1E8C7, 0X1E8C8,
    0X1E8C9, 0X1E8CA, 0X1E8CB, 0X1E8CC, 0X1E8CD, 0X1E8CE, 0X1E8CF, 0X1E950,
    0X1E951, 0X1E952, 0X1E953, 0X1E954, 0X1E955, 0X1E956, 0X1E957, 0X1E958,
    0X1E959, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106,
    0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064,
    0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD,
    0X20B19, 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [
    a for a in numeric_chars if unicodedata.digit(a, None) is not None
]

# Create a single string with the above data.
digits = ''.join(digit_chars)
numeric = ''.join(numeric_chars)