def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: "
             + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Beispiel #2
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: " +
             ", ".join("%s=%r" % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Beispiel #3
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' +
             ', '.join('%s=%r' % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord('0'))
Beispiel #4
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     self._arg_repr = ", ".join(
         f"{k}={v!r}"
         for k, v in [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",)))
     )
     if not intervals:
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             f"combination of arguments: {self._arg_repr}"
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
     self.Z_point = min(
         self.intervals.index_above(ord("Z")), len(self.intervals) - 1
     )
Beispiel #5
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' + ', '.join(
                 '%s=%r' % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Beispiel #6
0
        for u, v in intervals:
            for i in range(u, v + 1):
                real = unicodedata.category(chr(i))
                assert real == cat, f"{i} is {real} but reported in {cat}"


def assert_valid_range_list(ls):
    for u, v in ls:
        assert u <= v
    for i in range(len(ls) - 1):
        assert ls[i] <= ls[i + 1]
        assert ls[i][-1] < ls[i + 1][0]


@given(
    st.sets(st.sampled_from(cm.categories())),
    st.sets(st.sampled_from(cm.categories())) | st.none(),
)
def test_query_matches_categories(exclude, include):
    values = cm.query(exclude, include)
    assert_valid_range_list(values)
    for u, v in values:
        for i in (u, v, (u + v) // 2):
            cat = unicodedata.category(chr(i))
            if include is not None:
                assert cat in include
            assert cat not in exclude


@given(
    st.sets(st.sampled_from(cm.categories())),
Beispiel #7
0
# END HEADER

import operator
import re
import sre_constants as sre
import sre_parse
import sys

import hypothesis.strategies as st
from hypothesis import reject
from hypothesis.internal.charmap import as_general_categories, categories
from hypothesis.internal.compat import int_to_byte

HAS_SUBPATTERN_FLAGS = sys.version_info[:2] >= (3, 6)

UNICODE_CATEGORIES = set(categories())

SPACE_CHARS = set(" \t\n\r\f\v")
UNICODE_SPACE_CHARS = SPACE_CHARS | set("\x1c\x1d\x1e\x1f\x85")
UNICODE_DIGIT_CATEGORIES = {"Nd"}
UNICODE_SPACE_CATEGORIES = set(as_general_categories("Z"))
UNICODE_LETTER_CATEGORIES = set(as_general_categories("L"))
UNICODE_WORD_CATEGORIES = set(as_general_categories(["L", "N"]))

# This is verbose, but correct on all versions of Python
BYTES_ALL = {int_to_byte(i) for i in range(256)}
BYTES_DIGIT = {b for b in BYTES_ALL if re.match(b"\\d", b)}
BYTES_SPACE = {b for b in BYTES_ALL if re.match(b"\\s", b)}
BYTES_WORD = {b for b in BYTES_ALL if re.match(b"\\w", b)}
BYTES_LOOKUP = {
    sre.CATEGORY_DIGIT: BYTES_DIGIT,
            for i in range(u, v + 1):
                real = unicodedata.category(hunichr(i))
                assert real == cat, \
                    '%d is %s but reported in %s' % (i, real, cat)


def assert_valid_range_list(ls):
    for u, v in ls:
        assert u <= v
    for i in range(len(ls) - 1):
        assert ls[i] <= ls[i + 1]
        assert ls[i][-1] < ls[i + 1][0]


@given(
    st.sets(st.sampled_from(cm.categories())),
    st.sets(st.sampled_from(cm.categories())) | st.none(),
)
def test_query_matches_categories(exclude, include):
    values = cm.query(exclude, include)
    assert_valid_range_list(values)
    for u, v in values:
        for i in (u, v, (u + v) // 2):
            cat = unicodedata.category(hunichr(i))
            if include is not None:
                assert cat in include
            assert cat not in exclude


@given(
    st.sets(st.sampled_from(cm.categories())),
import operator
import re
import sre_constants as sre
import sre_parse
import sys

import hypothesis.strategies as st
from hypothesis import reject
from hypothesis.internal.charmap import as_general_categories, categories
from hypothesis.internal.compat import PY3, hrange, hunichr, int_to_byte, text_type

HAS_SUBPATTERN_FLAGS = sys.version_info[:2] >= (3, 6)


UNICODE_CATEGORIES = set(categories())


SPACE_CHARS = set(u" \t\n\r\f\v")
UNICODE_SPACE_CHARS = SPACE_CHARS | set(u"\x1c\x1d\x1e\x1f\x85")
UNICODE_DIGIT_CATEGORIES = {"Nd"}
UNICODE_SPACE_CATEGORIES = set(as_general_categories("Z"))
UNICODE_LETTER_CATEGORIES = set(as_general_categories("L"))
UNICODE_WORD_CATEGORIES = set(as_general_categories(["L", "N"]))

# This is verbose, but correct on all versions of Python
BYTES_ALL = {int_to_byte(i) for i in range(256)}
BYTES_DIGIT = {b for b in BYTES_ALL if re.match(b"\\d", b)}
BYTES_SPACE = {b for b in BYTES_ALL if re.match(b"\\s", b)}
BYTES_WORD = {b for b in BYTES_ALL if re.match(b"\\w", b)}
BYTES_LOOKUP = {