def test_groupref_exists(): assert_all_examples( st.from_regex(u"^(<)?a(?(1)>)$"), lambda s: s in (u"a", u"a\n", u"<a>", u"<a>\n"), ) assert_all_examples( st.from_regex(u"^(a)?(?(1)b|c)$"), lambda s: s in (u"ab", u"ab\n", u"c", u"c\n") )
def test_groupref_exists(): assert_all_examples( st.from_regex(u'^(<)?a(?(1)>)$'), lambda s: s in (u'a', u'a\n', u'<a>', u'<a>\n') ) assert_all_examples( st.from_regex(u'^(a)?(?(1)b|c)$'), lambda s: s in (u'ab', u'ab\n', u'c', u'c\n') )
def test_groups(pattern, is_unicode, invert): if u'd' in pattern.lower(): group_pred = is_digit elif u'w' in pattern.lower(): group_pred = is_word else: # Special behaviour due to \x1c, INFORMATION SEPARATOR FOUR group_pred = is_unicode_space if is_unicode else is_space if invert: pattern = pattern.swapcase() _p = group_pred def group_pred(s): return not _p(s) pattern = u'^%s\\Z' % (pattern,) compiler = unicode_regex if is_unicode else ascii_regex strategy = st.from_regex(compiler(pattern)) find_any(strategy.filter(group_pred), is_ascii) if is_unicode: find_any(strategy, lambda s: group_pred(s) and not is_ascii(s)) assert_all_examples(strategy, group_pred)
def _for_text(field): # We can infer a vastly more precise strategy by considering the # validators as well as the field type. This is a minimal proof of # concept, but we intend to leverage the idea much more heavily soon. # See https://github.com/HypothesisWorks/hypothesis-python/issues/1116 regexes = [ re.compile(v.regex, v.flags) if isinstance(v.regex, str) else v.regex for v in field.validators if isinstance(v, django.core.validators.RegexValidator) and not v.inverse_match ] if regexes: # This strategy generates according to one of the regexes, and # filters using the others. It can therefore learn to generate # from the most restrictive and filter with permissive patterns. # Not maximally efficient, but it makes pathological cases rarer. # If you want a challenge: extend https://qntm.org/greenery to # compute intersections of the full Python regex language. return st.one_of(*[st.from_regex(r) for r in regexes]) # If there are no (usable) regexes, we use a standard text strategy. min_size = 1 if getattr(field, "blank", False) or not getattr(field, "required", True): min_size = 0 strategy = st.text( alphabet=st.characters( blacklist_characters=u"\x00", blacklist_categories=("Cs",) ), min_size=min_size, max_size=field.max_length, ) if getattr(field, "required", True): strategy = strategy.filter(lambda s: s.strip()) return strategy
def _get_strategy_for_field(f): # type: (Type[dm.Field]) -> st.SearchStrategy[Any] if f.choices: choices = [] # type: list for value, name_or_optgroup in f.choices: if isinstance(name_or_optgroup, (list, tuple)): choices.extend(key for key, _ in name_or_optgroup) else: choices.append(value) if isinstance(f, (dm.CharField, dm.TextField)) and f.blank: choices.insert(0, u'') strategy = st.sampled_from(choices) elif type(f) == dm.SlugField: strategy = st.text(alphabet=string.ascii_letters + string.digits, min_size=(0 if f.blank else 1), max_size=f.max_length) elif type(f) == dm.GenericIPAddressField: lookup = {'both': ip4_addr_strings() | ip6_addr_strings(), 'ipv4': ip4_addr_strings(), 'ipv6': ip6_addr_strings()} strategy = lookup[f.protocol.lower()] elif type(f) in (dm.TextField, dm.CharField): strategy = st.text( alphabet=st.characters(blacklist_characters=u'\x00', blacklist_categories=('Cs',)), min_size=(0 if f.blank else 1), max_size=f.max_length, ) # We can infer a vastly more precise strategy by considering the # validators as well as the field type. This is a minimal proof of # concept, but we intend to leverage the idea much more heavily soon. # See https://github.com/HypothesisWorks/hypothesis-python/issues/1116 re_validators = [ v for v in f.validators if isinstance(v, validators.RegexValidator) and not v.inverse_match ] if re_validators: regexes = [re.compile(v.regex, v.flags) if isinstance(v.regex, str) else v.regex for v in re_validators] # This strategy generates according to one of the regexes, and # filters using the others. It can therefore learn to generate # from the most restrictive and filter with permissive patterns. # Not maximally efficient, but it makes pathological cases rarer. # If you want a challenge: extend https://qntm.org/greenery to # compute intersections of the full Python regex language. strategy = st.one_of(*[st.from_regex(r) for r in regexes]) elif type(f) == dm.DecimalField: bound = Decimal(10 ** f.max_digits - 1) / (10 ** f.decimal_places) strategy = st.decimals(min_value=-bound, max_value=bound, places=f.decimal_places) else: strategy = field_mappings().get(type(f), st.nothing()) if f.validators: strategy = strategy.filter(validator_to_filter(f)) if f.null: strategy = st.one_of(st.none(), strategy) return strategy
def test_subpattern_flags(): strategy = st.from_regex(u'(?i)a(?-i:b)') # "a" is case insensitive find_any(strategy, lambda s: s[0] == u'a') find_any(strategy, lambda s: s[0] == u'A') # "b" is case sensitive find_any(strategy, lambda s: s[1] == u'b') assert_no_examples(strategy, lambda s: s[1] == u'B')
def test_issue_992_regression(data): strat = st.from_regex( re.compile( r"""\d + # the integral part \. # the decimal point \d * # some fractional digits""", re.VERBOSE, ) ) data.draw(strat)
def test_subpattern_flags(): strategy = st.from_regex(u"(?i)\\Aa(?-i:b)\\Z") # "a" is case insensitive find_any(strategy, lambda s: s[0] == u"a") find_any(strategy, lambda s: s[0] == u"A") # "b" is case sensitive find_any(strategy, lambda s: s[1] == u"b") assert_no_examples(strategy, lambda s: s[1] == u"B")
def test_fuzz_stuff(data): pattern = data.draw( st.text(min_size=1, max_size=5) | st.binary(min_size=1, max_size=5) | CONSERVATIVE_REGEX.filter(bool) ) try: regex = re.compile(pattern) except re.error: reject() ex = data.draw(st.from_regex(regex)) assert regex.search(ex)
def test_end(): strategy = st.from_regex(u'abc$') find_any(strategy, lambda s: s == u'abc') find_any(strategy, lambda s: s == u'abc\n')
def test_end_with_terminator_does_not_pad(): assert_all_examples(st.from_regex(u'abc\Z'), lambda x: x[-3:] == u"abc")
def test_bare_caret_can_produce(): find_any(st.from_regex(u'^'), bool)
def test_positive_lookbehind(): find_any(st.from_regex(u'.*(?<=ab)c'), lambda s: s.endswith(u'abc'))
def test_can_handle_binary_regex_which_is_not_ascii(): bad = b'\xad' assert_all_examples(st.from_regex(bad), lambda x: bad in x)
def test_can_pad_strings_with_newlines(): find_any(st.from_regex(u'^$'), bool) find_any(st.from_regex(b'^$'), bool)
def test_can_pad_strings_arbitrarily(): find_any(st.from_regex(u'a'), lambda x: x[0] != u'a') find_any(st.from_regex(u'a'), lambda x: x[-1] != u'a')
def test_positive_lookahead(): st.from_regex(u'a(?=bc).*').filter( lambda s: s.startswith(u'abc')).example()
def test_regex_have_same_type_as_pattern(pattern): @given(st.from_regex(pattern)) def test_result_type(s): assert type(s) == type(pattern) test_result_type()
def test_negative_lookahead(): # no efficient support strategy = st.from_regex(u'^ab(?!cd)[abcd]*') assert_all_examples(strategy, lambda s: not s.startswith(u'abcd')) assert_no_examples(strategy, lambda s: s.startswith(u'abcd'))
def test_negative_lookbehind(): # no efficient support strategy = st.from_regex(u'[abc]*(?<!abc)d') assert_all_examples(strategy, lambda s: not s.endswith(u'abcd')) assert_no_examples(strategy, lambda s: s.endswith(u'abcd'))
def test_any_with_dotall_generate_newline(pattern): find_any(st.from_regex(pattern), lambda s: s == u'\n')
def test_can_pad_empty_strings(): find_any(st.from_regex(u''), bool) find_any(st.from_regex(b''), bool)
def test_groupref_not_shared_between_regex(): # If group references are (incorrectly!) shared between regex, this would # fail as the would only be one reference. st.tuples(st.from_regex('(a)\\1'), st.from_regex('(b)\\1')).example()
def test_given_multiline_regex_can_insert_after_dollar(): find_any(st.from_regex(re.compile(u"\Ahi$", re.MULTILINE)), lambda x: '\n' in x and x.split(u"\n")[1])
def test_given_multiline_regex_can_insert_before_caret(): find_any(st.from_regex(re.compile(u"^hi\Z", re.MULTILINE)), lambda x: '\n' in x and x.split(u"\n")[0])
def test_given_multiline_regex_can_insert_before_caret(): find_any( st.from_regex(re.compile(u"^hi\Z", re.MULTILINE)), lambda x: '\n' in x and x.split(u"\n")[0] )
def test_does_not_left_pad_beginning_of_string_marker(): assert_all_examples(st.from_regex(u'\\Afoo'), lambda x: x.startswith(u'foo'))
def test_shared_union(): # This gets parsed as [(ANY, None), (BRANCH, (None, [[], []]))], the # interesting feature of which is that it contains empty sub-expressions # in the branch. find_any(st.from_regex('.|.'))
def test_any_with_dotall_generate_newline_binary(pattern): find_any(st.from_regex(pattern), lambda s: s == b'\n')
def test_bare_dollar_can_produce(): find_any(st.from_regex(u'$'), bool)
def test_caret_in_the_middle_does_not_generate_anything(): r = re.compile(u'a^b') assert_no_examples(st.from_regex(r))
SUB_DELIMS = "[!$&'()*+,;=]" HOST = r"\A(?:{unreserved}|{pct_encoded}|{sub_delims})*\Z".format(unreserved=UNRESERVED, pct_encoded=PCT_ENCODED, sub_delims=SUB_DELIMS) PCHAR = "(?:{unreserved}|{pct_encoded}|{sub_delims}|[:@])".format(unreserved=UNRESERVED, pct_encoded=PCT_ENCODED, sub_delims=SUB_DELIMS) SEGMENT = "{pchar}*".format(pchar=PCHAR) SEGMENT_NZ = "{pchar}+".format(pchar=PCHAR) PATH_ABEMPTY = "((?:/{segment})*)".format(segment=SEGMENT) PATH_ABSOLUTE = "(/(?:{segment_nz}(?:/{segment})*)?)".format(segment=SEGMENT, segment_nz=SEGMENT_NZ) PATH_ROOTLESS = "({segment_nz}(?:/{segment})*)".format(segment=SEGMENT, segment_nz=SEGMENT_NZ) PATH_EMPTY = "()" HIER_PART = "\A(?:{path_abempty}|{path_absolute}|{path_rootless}|{path_empty})\Z".format(path_abempty=PATH_ABEMPTY, path_absolute=PATH_ABSOLUTE, path_rootless=PATH_ROOTLESS, path_empty=PATH_EMPTY) FRAGMENT = "\A((?:{pchar}|[/?])*)\Z".format(pchar=PCHAR) QUERY = "\A((?:{pchar}|[/?])*)\Z".format(pchar=PCHAR) @given(from_regex(SCHEMA), from_regex(HOST), from_regex(HIER_PART), from_regex(QUERY), from_regex(FRAGMENT)) def test_some_stuff(schema, host, path, query, fragment): uri = UriBuilder().scheme(schema).host(host).path(path).fragment(fragment).build() print(uri)
def test_impossible_negative_lookahead(): assert_no_examples(st.from_regex(u'(?!foo)foo'))
def test_groupref_exists(): assert_all_examples( st.from_regex(u"^(<)?a(?(1)>)$"), lambda s: s in (u"a", u"a\n", u"<a>", u"<a>\n"), ) assert_all_examples(st.from_regex(u"^(a)?(?(1)b|c)$"), lambda s: s in (u"ab", u"ab\n", u"c", u"c\n")) def test_impossible_negative_lookahead(): assert_no_examples(st.from_regex(u"(?!foo)foo")) @given(st.from_regex(u"(\\Afoo\\Z)")) def test_can_handle_boundaries_nested(s): assert s == u"foo" def test_groupref_not_shared_between_regex(): # If group references are (incorrectly!) shared between regex, this would # fail as the would only be one reference. st.tuples(st.from_regex("(a)\\1"), st.from_regex("(b)\\1")).example() @given(st.data()) def test_group_ref_is_not_shared_between_identical_regex(data): pattern = re.compile(u"^(.+)\\1\\Z", re.UNICODE) x = data.draw(base_regex_strategy(pattern)) y = data.draw(base_regex_strategy(pattern))
def test_can_generate(pattern, encode): if encode: pattern = pattern.encode("ascii") assert_all_examples(st.from_regex(pattern), re.compile(pattern).search)
def test_literals_with_ignorecase(pattern): strategy = st.from_regex(pattern) find_any(strategy, lambda s: s == u"a") find_any(strategy, lambda s: s == u"A")
def test_any_doesnt_generate_newline(): assert_all_examples(st.from_regex(u'.'), lambda s: s != u'\n')
def test_not_literal_with_ignorecase(pattern): assert_all_examples( st.from_regex(pattern), lambda s: s[0] not in (u"a", u"A") and s[1] not in (u"b", u"B"), )
def test_any_with_dotall_generate_newline_binary(pattern): find_any(st.from_regex(pattern), lambda s: s == b"\n")
def test_given_multiline_regex_can_insert_after_dollar(): find_any( st.from_regex(re.compile(u"\Ahi$", re.MULTILINE)), lambda x: '\n' in x and x.split(u"\n")[1] )
def test_caret_in_the_middle_does_not_generate_anything(): r = re.compile(u"a^b") assert_no_examples(st.from_regex(r))
def test_does_not_left_pad_beginning_of_string_marker(): assert_all_examples( st.from_regex(u'\\Afoo'), lambda x: x.startswith(u'foo'))
def test_end_with_terminator_does_not_pad(): assert_all_examples(st.from_regex(u"abc\\Z"), lambda x: x[-3:] == u"abc")
def test_groupref_exists(): assert_all_examples(st.from_regex(u'^(<)?a(?(1)>)$'), lambda s: s in (u'a', u'a\n', u'<a>', u'<a>\n')) assert_all_examples(st.from_regex(u'^(a)?(?(1)b|c)$'), lambda s: s in (u'ab', u'ab\n', u'c', u'c\n'))
def test_any_doesnt_generate_newline(): assert_all_examples(st.from_regex(u"\\A.\\Z"), lambda s: s != u"\n")