def test_digit_chars_contains_all_valid_unicode_digit_characters(): for i in py23_range(0X10FFFF): try: a = py23_unichr(i) except ValueError: break if a in set('0123456789'): continue if unicodedata.digit(a, None) is not None: assert a in digit_chars
def test_digit_chars_contains_all_valid_unicode_digit_characters(): set_numeric_hex = set(numeric_hex) set_numeric_chars = set(numeric_chars) for i in py23_range(0X110000): try: a = py23_unichr(i) except ValueError: break if a in set('0123456789'): continue if unicodedata.digit(a, None) is not None: assert i in set_numeric_hex assert a in set_numeric_chars
having to worry about if it is using PyICU or the built-in locale. """ from __future__ import absolute_import, division, print_function, unicode_literals # Std. lib imports. import sys from functools import cmp_to_key # Local imports. from natsort.compat.py23 import PY_VERSION, py23_unichr # This string should be sorted after any other byte string because # it contains the max unicode character repeated 20 times. # You would need some odd data to come after that. null_string = "" null_string_max = py23_unichr(sys.maxunicode) * 20 # Make the strxfrm function from strcoll on Python2 # It can be buggy (especially on BSD-based systems), # so prefer icu if available. try: # noqa: C901 import icu from locale import getlocale null_string_locale = b"" # This string should in theory be sorted after any other byte # string because it contains the max byte char repeated many times. # You would need some odd data to come after that. null_string_locale_max = b"x7f" * 50
def load_locale(x): """ Convenience to load a locale, trying ISO8859-1 first.""" try: locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x))) except: locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x))) # Check if de_DE is installed. try: load_locale('de_DE') has_locale_de_DE = True except locale.Error: has_locale_de_DE = False # Depending on the python version, use lower or casefold # to make a string lowercase. try: low = py23_str.casefold except AttributeError: low = py23_str.lower # There are some unicode values that are known failures on BSD systems # that has nothing to do with natsort (a ValueError is raised by strxfrm). # Let's filter them out. try: bad_uni_chars = set(py23_unichr(x) for x in py23_range(0X10fefd, 0X10ffff+1)) except ValueError: # Narrow unicode build... no worries. bad_uni_chars = set()
import pytest from hypothesis import example, given from hypothesis.strategies import floats, integers, text from natsort.compat.fastnumbers import fast_float, fast_int from natsort.compat.locale import get_strxfrm from natsort.compat.py23 import py23_range, py23_str, py23_unichr from natsort.ns_enum import NS_DUMB, ns from natsort.utils import groupletters, string_component_transform_factory # There are some unicode values that are known failures with the builtin locale # library on BSD systems that has nothing to do with natsort (a ValueError is # raised by strxfrm). Let's filter them out. try: bad_uni_chars = frozenset( py23_unichr(x) for x in py23_range(0X10fefd, 0X10ffff + 1) ) except ValueError: # Narrow unicode build... no worries. bad_uni_chars = frozenset() def no_bad_uni_chars(x, _bad_chars=bad_uni_chars): """Ensure text does not contain bad unicode characters""" return not any(y in _bad_chars for y in x) def no_null(x): """Ensure text does not contain a null character.""" return "\0" not in x
0x20AEA, 0x20AFD, 0x20B19, 0x22390, 0x22998, 0x23B1B, 0x2626D, 0x2F890, ) # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: l = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(l, None) is None: continue # pragma: no cover numeric_chars.append(l) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # Create a single string with the above data. digits = "".join(digit_chars) numeric = "".join(numeric_chars)
""" from __future__ import absolute_import, division, print_function, unicode_literals import unicodedata from natsort.compat.py23 import py23_unichr from natsort.unicode_numeric_hex import numeric_hex # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: character = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(character, None) is None: continue # pragma: no cover numeric_chars.append(character) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # The decimal characters are a subset of the numberals # (probably of the digits, but let's be safe). decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None] # Create a single string with the above data. decimals = "".join(decimal_chars)
0X1E957, 0X1E958, 0X1E959, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064, 0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890, ) # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: l = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(l, None) is None: continue # pragma: no cover numeric_chars.append(l) # The digit characters are a subset of the numerals. digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] # The decimal characters are a subset of the numberals # (probably of the digits, but let's be safe). decimal_chars = [a for a in numeric_chars if unicodedata.decimal(a, None) is not None]
0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890, ) # Some code that can be used to create the above list of hex numbers. if __name__ == "__main__": import unicodedata from natsort.compat.py23 import py23_range, py23_unichr hex_chars = [] for i in py23_range(0X110000): try: a = py23_unichr(i) except ValueError: break if a in "0123456789": continue if unicodedata.numeric(a, None) is not None: hex_chars.append(i) print(", ".join(["0X{:X}".format(i) for i in hex_chars]))
0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, 0X1D7FE, 0X1D7FF, 0X1E8C7, 0X1E8C8, 0X1E8C9, 0X1E8CA, 0X1E8CB, 0X1E8CC, 0X1E8CD, 0X1E8CE, 0X1E8CF, 0X1E950, 0X1E951, 0X1E952, 0X1E953, 0X1E954, 0X1E955, 0X1E956, 0X1E957, 0X1E958, 0X1E959, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, 0X20064, 0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890) # Convert each hex into the literal Unicode character. # Stop if a ValueError is raised in case of a narrow Unicode build. # The extra check with unicodedata is in case this Python version # does not support some characters. numeric_chars = [] for a in numeric_hex: try: l = py23_unichr(a) except ValueError: # pragma: no cover break if unicodedata.numeric(l, None) is None: continue # pragma: no cover numeric_chars.append(l) # The digit characters are a subset of the numerals. digit_chars = [ a for a in numeric_chars if unicodedata.digit(a, None) is not None ] # Create a single string with the above data. digits = ''.join(digit_chars) numeric = ''.join(numeric_chars)