def test_digit_chars_contains_all_valid_unicode_digit_characters():
    for i in py23_range(0X10FFFF):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in set('0123456789'):
            continue
        if unicodedata.digit(a, None) is not None:
            assert a in digit_chars
def test_digit_chars_contains_all_valid_unicode_digit_characters():
    set_numeric_hex = set(numeric_hex)
    set_numeric_chars = set(numeric_chars)
    for i in py23_range(0X110000):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in set('0123456789'):
            continue
        if unicodedata.digit(a, None) is not None:
            assert i in set_numeric_hex
            assert a in set_numeric_chars
Example #3
0
having to worry about if it is using PyICU or the built-in locale.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

# Std. lib imports.
import sys
from functools import cmp_to_key

# Local imports.
from natsort.compat.py23 import PY_VERSION, py23_unichr

# This string should be sorted after any other byte string because
# it contains the max unicode character repeated 20 times.
# You would need some odd data to come after that.
null_string = ""
null_string_max = py23_unichr(sys.maxunicode) * 20

# Make the strxfrm function from strcoll on Python2
# It can be buggy (especially on BSD-based systems),
# so prefer icu if available.
try:  # noqa: C901
    import icu
    from locale import getlocale

    null_string_locale = b""

    # This string should in theory be sorted after any other byte
    # string because it contains the max byte char repeated many times.
    # You would need some odd data to come after that.
    null_string_locale_max = b"x7f" * 50
Example #4
0
def load_locale(x):
    """ Convenience to load a locale, trying ISO8859-1 first."""
    try:
        locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x)))
    except:
        locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x)))

# Check if de_DE is installed.
try:
    load_locale('de_DE')
    has_locale_de_DE = True
except locale.Error:
    has_locale_de_DE = False

# Depending on the python version, use lower or casefold
# to make a string lowercase.
try:
    low = py23_str.casefold
except AttributeError:
    low = py23_str.lower

# There are some unicode values that are known failures on BSD systems
# that has nothing to do with natsort (a ValueError is raised by strxfrm).
# Let's filter them out.
try:
    bad_uni_chars = set(py23_unichr(x) for x in py23_range(0X10fefd,
                                                           0X10ffff+1))
except ValueError:
    # Narrow unicode build... no worries.
    bad_uni_chars = set()
import pytest
from hypothesis import example, given
from hypothesis.strategies import floats, integers, text
from natsort.compat.fastnumbers import fast_float, fast_int
from natsort.compat.locale import get_strxfrm
from natsort.compat.py23 import py23_range, py23_str, py23_unichr
from natsort.ns_enum import NS_DUMB, ns
from natsort.utils import groupletters, string_component_transform_factory

# There are some unicode values that are known failures with the builtin locale
# library on BSD systems that has nothing to do with natsort (a ValueError is
# raised by strxfrm). Let's filter them out.
try:
    bad_uni_chars = frozenset(
        py23_unichr(x) for x in py23_range(0X10fefd, 0X10ffff + 1)
    )
except ValueError:
    # Narrow unicode build... no worries.
    bad_uni_chars = frozenset()


def no_bad_uni_chars(x, _bad_chars=bad_uni_chars):
    """Ensure text does not contain bad unicode characters"""
    return not any(y in _bad_chars for y in x)


def no_null(x):
    """Ensure text does not contain a null character."""
    return "\0" not in x
Example #6
0
    0x20AEA,
    0x20AFD,
    0x20B19,
    0x22390,
    0x22998,
    0x23B1B,
    0x2626D,
    0x2F890,
)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None]

# Create a single string with the above data.
digits = "".join(digit_chars)
numeric = "".join(numeric_chars)
Example #7
0
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import unicodedata

from natsort.compat.py23 import py23_unichr
from natsort.unicode_numeric_hex import numeric_hex

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        character = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(character, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(character)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None]

# Create a single string with the above data.
decimals = "".join(decimal_chars)
Example #8
0
    0X1E957, 0X1E958, 0X1E959, 0X1F100, 0X1F101, 0X1F102,
    0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107, 0X1F108,
    0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064,
    0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C,
    0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, 0X23B1B,
    0X2626D, 0X2F890,
)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [a for a in numeric_chars
               if unicodedata.digit(a, None) is not None]

# The decimal characters are a subset of the numberals
# (probably of the digits, but let's be safe).
decimal_chars = [a for a in numeric_chars
                 if unicodedata.decimal(a, None) is not None]
    0X20983,
    0X2098C,
    0X2099C,
    0X20AEA,
    0X20AFD,
    0X20B19,
    0X22390,
    0X22998,
    0X23B1B,
    0X2626D,
    0X2F890,
)

# Some code that can be used to create the above list of hex numbers.
if __name__ == "__main__":
    import unicodedata
    from natsort.compat.py23 import py23_range, py23_unichr

    hex_chars = []
    for i in py23_range(0X110000):
        try:
            a = py23_unichr(i)
        except ValueError:
            break
        if a in "0123456789":
            continue
        if unicodedata.numeric(a, None) is not None:
            hex_chars.append(i)

    print(", ".join(["0X{:X}".format(i) for i in hex_chars]))
Example #10
0
    0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, 0X1D7FE, 0X1D7FF, 0X1E8C7, 0X1E8C8,
    0X1E8C9, 0X1E8CA, 0X1E8CB, 0X1E8CC, 0X1E8CD, 0X1E8CE, 0X1E8CF, 0X1E950,
    0X1E951, 0X1E952, 0X1E953, 0X1E954, 0X1E955, 0X1E956, 0X1E957, 0X1E958,
    0X1E959, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106,
    0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064,
    0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD,
    0X20B19, 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890)

# Convert each hex into the literal Unicode character.
# Stop if a ValueError is raised in case of a narrow Unicode build.
# The extra check with unicodedata is in case this Python version
# does not support some characters.
numeric_chars = []
for a in numeric_hex:
    try:
        l = py23_unichr(a)
    except ValueError:  # pragma: no cover
        break
    if unicodedata.numeric(l, None) is None:
        continue  # pragma: no cover
    numeric_chars.append(l)

# The digit characters are a subset of the numerals.
digit_chars = [
    a for a in numeric_chars if unicodedata.digit(a, None) is not None
]

# Create a single string with the above data.
digits = ''.join(digit_chars)
numeric = ''.join(numeric_chars)