def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] raise InvalidArgument( "No characters are allowed to be generated by this " "combination of arguments: " + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None) ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0"))
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] raise InvalidArgument( "No characters are allowed to be generated by this " "combination of arguments: " + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0"))
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ('whitelist_categories', whitelist_categories), ('blacklist_categories', blacklist_categories), ('whitelist_characters', whitelist_characters), ('blacklist_characters', blacklist_characters), ('min_codepoint', min_codepoint), ('max_codepoint', max_codepoint), ] raise InvalidArgument( 'No characters are allowed to be generated by this ' 'combination of arguments: ' + ', '.join('%s=%r' % arg for arg in arguments if arg[1] is not None)) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord('0'))
def __init__( self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None, ): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) self._arg_repr = ", ".join( f"{k}={v!r}" for k, v in [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), ("blacklist_characters", blacklist_characters), ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",))) ) if not intervals: raise InvalidArgument( "No characters are allowed to be generated by this " f"combination of arguments: {self._arg_repr}" ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0")) self.Z_point = min( self.intervals.index_above(ord("Z")), len(self.intervals) - 1 )
def __init__(self, whitelist_categories=None, blacklist_categories=None, blacklist_characters=None, min_codepoint=None, max_codepoint=None, whitelist_characters=None): assert set(whitelist_categories or ()).issubset(charmap.categories()) assert set(blacklist_categories or ()).issubset(charmap.categories()) intervals = charmap.query( include_categories=whitelist_categories, exclude_categories=blacklist_categories, min_codepoint=min_codepoint, max_codepoint=max_codepoint, include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) if not intervals: arguments = [ ('whitelist_categories', whitelist_categories), ('blacklist_categories', blacklist_categories), ('whitelist_characters', whitelist_characters), ('blacklist_characters', blacklist_characters), ('min_codepoint', min_codepoint), ('max_codepoint', max_codepoint), ] raise InvalidArgument( 'No characters are allowed to be generated by this ' 'combination of arguments: ' + ', '.join( '%s=%r' % arg for arg in arguments if arg[1] is not None) ) self.intervals = IntervalSet(intervals) if whitelist_characters: self.whitelist_characters = set(whitelist_characters) else: self.whitelist_characters = set() self.zero_point = self.intervals.index_above(ord('0'))
for u, v in intervals: for i in range(u, v + 1): real = unicodedata.category(chr(i)) assert real == cat, f"{i} is {real} but reported in {cat}" def assert_valid_range_list(ls): for u, v in ls: assert u <= v for i in range(len(ls) - 1): assert ls[i] <= ls[i + 1] assert ls[i][-1] < ls[i + 1][0] @given( st.sets(st.sampled_from(cm.categories())), st.sets(st.sampled_from(cm.categories())) | st.none(), ) def test_query_matches_categories(exclude, include): values = cm.query(exclude, include) assert_valid_range_list(values) for u, v in values: for i in (u, v, (u + v) // 2): cat = unicodedata.category(chr(i)) if include is not None: assert cat in include assert cat not in exclude @given( st.sets(st.sampled_from(cm.categories())),
# END HEADER import operator import re import sre_constants as sre import sre_parse import sys import hypothesis.strategies as st from hypothesis import reject from hypothesis.internal.charmap import as_general_categories, categories from hypothesis.internal.compat import int_to_byte HAS_SUBPATTERN_FLAGS = sys.version_info[:2] >= (3, 6) UNICODE_CATEGORIES = set(categories()) SPACE_CHARS = set(" \t\n\r\f\v") UNICODE_SPACE_CHARS = SPACE_CHARS | set("\x1c\x1d\x1e\x1f\x85") UNICODE_DIGIT_CATEGORIES = {"Nd"} UNICODE_SPACE_CATEGORIES = set(as_general_categories("Z")) UNICODE_LETTER_CATEGORIES = set(as_general_categories("L")) UNICODE_WORD_CATEGORIES = set(as_general_categories(["L", "N"])) # This is verbose, but correct on all versions of Python BYTES_ALL = {int_to_byte(i) for i in range(256)} BYTES_DIGIT = {b for b in BYTES_ALL if re.match(b"\\d", b)} BYTES_SPACE = {b for b in BYTES_ALL if re.match(b"\\s", b)} BYTES_WORD = {b for b in BYTES_ALL if re.match(b"\\w", b)} BYTES_LOOKUP = { sre.CATEGORY_DIGIT: BYTES_DIGIT,
for i in range(u, v + 1): real = unicodedata.category(hunichr(i)) assert real == cat, \ '%d is %s but reported in %s' % (i, real, cat) def assert_valid_range_list(ls): for u, v in ls: assert u <= v for i in range(len(ls) - 1): assert ls[i] <= ls[i + 1] assert ls[i][-1] < ls[i + 1][0] @given( st.sets(st.sampled_from(cm.categories())), st.sets(st.sampled_from(cm.categories())) | st.none(), ) def test_query_matches_categories(exclude, include): values = cm.query(exclude, include) assert_valid_range_list(values) for u, v in values: for i in (u, v, (u + v) // 2): cat = unicodedata.category(hunichr(i)) if include is not None: assert cat in include assert cat not in exclude @given( st.sets(st.sampled_from(cm.categories())),
import operator import re import sre_constants as sre import sre_parse import sys import hypothesis.strategies as st from hypothesis import reject from hypothesis.internal.charmap import as_general_categories, categories from hypothesis.internal.compat import PY3, hrange, hunichr, int_to_byte, text_type HAS_SUBPATTERN_FLAGS = sys.version_info[:2] >= (3, 6) UNICODE_CATEGORIES = set(categories()) SPACE_CHARS = set(u" \t\n\r\f\v") UNICODE_SPACE_CHARS = SPACE_CHARS | set(u"\x1c\x1d\x1e\x1f\x85") UNICODE_DIGIT_CATEGORIES = {"Nd"} UNICODE_SPACE_CATEGORIES = set(as_general_categories("Z")) UNICODE_LETTER_CATEGORIES = set(as_general_categories("L")) UNICODE_WORD_CATEGORIES = set(as_general_categories(["L", "N"])) # This is verbose, but correct on all versions of Python BYTES_ALL = {int_to_byte(i) for i in range(256)} BYTES_DIGIT = {b for b in BYTES_ALL if re.match(b"\\d", b)} BYTES_SPACE = {b for b in BYTES_ALL if re.match(b"\\s", b)} BYTES_WORD = {b for b in BYTES_ALL if re.match(b"\\w", b)} BYTES_LOOKUP = {