def regex(draw, state_bound=None): bases = draw(st.lists( st.builds(rd.char, st.integers(0, 255),), min_size=1, average_size=20)) while len(bases) > 1: n, op = draw( st.sampled_from(( (1, rd.star), (1, rd.nonempty), (2, rd.union), (2, rd.intersection), (2, rd.concatenate), (2, rd.subtract), (1, lambda r: rd.bounded(r, 1)), (1, lambda r: rd.bounded(r, 2)), (1, lambda r: rd.bounded(r, 3)), (1, lambda r: rd.bounded(r, 4)), )), ) if n > len(bases): continue args = [bases.pop() for _ in range(n)] bases.append(op(*args)) result = bases[0] assume(result not in (rd.Empty, rd.Epsilon)) assume(not isinstance(result, rd.Characters)) if state_bound is not None: seen = {result} threshold = {result} while threshold: threshold = { rd.derivative(u, c) for u in threshold for c in rd.valid_starts(u) } threshold -= seen seen.update(threshold) assume(len(seen) <= state_bound) return result
def test_bounds_are_not_nested(): x = rd.bounded(rd.star(rd.char(0)), 7) y = rd.bounded(x, 5) assert x.bound == 7 assert y.bound == 5 assert isinstance(y.child, rd.Star)
def test_bounds_propagate_through_subtraction(): x = rd.star(rd.char(b'\0\1')) y = rd.literal(b'\0\0\0\1') z = rd.subtract(x, y) b = rd.bounded(z, 10) assert isinstance(b, rd.Subtraction) assert isinstance(b.left, rd.Bounded)
def test_bounds_propagate_through_intersections(): x = rd.star(rd.char(b'\0\1')) y = rd.star(rd.char(b'\1\2')) assert isinstance( rd.bounded(rd.intersection(x, y), 3), rd.Intersection )
def test_count_below_bound_is_the_same(re, m, n): assume(rd.has_matches(re)) m, n = sorted((m, n)) count1 = rd.LanguageCounter(*rd.build_dfa(re)).count(m) count2 = rd.LanguageCounter(*rd.build_dfa(rd.bounded(re, n))).count(m) assert count1 == count2
def test_can_simulate_accurately(regex, seed, max_size, param): assume(param > 0) assume(rd.has_matches(regex)) if max_size is not None: assume(rd.has_matches(rd.bounded(regex, max_size))) sim = Simulator(regex, seed) d = sim.draw(param, max_size=max_size) try: next(d) except ParamTooLarge: assert param > 0 reject() for _ in range(10): x = next(d) assert rd.matches(regex, x) if max_size is not None: assert len(x) <= max_size
def test_complex_graphs_may_be_finite(): x = to_basic(rd.bounded( rd.union(rd.star(rd.char(0)), rd.star(rd.char(1))), 20)) assert not rd.is_infinite(x)
def test_bounded_is_not_infinite(): assert not rd.is_infinite(rd.bounded(rd.star(rd.char(0)), 10 ** 6))
def test_bounded_min_matches_bounds(re, n): bd = rd.bounded(re, n) assume(rd.has_matches(bd)) assert len(rd.lexmin(bd)) <= n
def test_bounds_propagate_through_unions(): assert isinstance( rd.bounded(rd.union(rd.star(rd.char(0)), rd.star(rd.char(1))), 1), rd.Union )
def test_basic_impossible_bounds_are_empty(): assert rd.bounded(rd.char(0), -1) is rd.Empty assert rd.bounded(rd.char(0), 0) is rd.Empty
def test_bounded_does_not_wrap_obviously_bounded(c): assert rd.bounded(c, 1) is c assert rd.bounded(rd.Empty, 1) is rd.Empty
count1 = rd.LanguageCounter(*rd.build_dfa(re)).count(m) count2 = rd.LanguageCounter(*rd.build_dfa(rd.bounded(re, n))).count(m) assert count1 == count2 def test_clearing_caches_resets_identity(): c1 = rd.char(0) c2 = rd.char(0) rd.clear_caches() c3 = rd.char(0) assert c1 is c2 is not c3 @pytest.mark.parametrize( 'c', [rd.Empty, rd.Epsilon, rd.char(0), rd.bounded(rd.star(rd.char(0)), 1)] ) def test_bounded_does_not_wrap_obviously_bounded(c): assert rd.bounded(c, 1) is c assert rd.bounded(rd.Empty, 1) is rd.Empty def test_basic_impossible_bounds_are_empty(): assert rd.bounded(rd.char(0), -1) is rd.Empty assert rd.bounded(rd.char(0), 0) is rd.Empty def test_bounds_are_not_nested(): x = rd.bounded(rd.star(rd.char(0)), 7) y = rd.bounded(x, 5)