def test_relearning_does_not_change_generation(): learner = LStar(lambda s: len(s) >= 3) prev = learner.generation learner.learn(bytes(3)) assert prev != learner.generation prev = learner.generation learner.learn(bytes(3)) assert prev == learner.generation
def test_can_learn_dead_nodes(): learner = LStar(lambda s: len(s) == 4 and max(s) <= 1) learner.learn(bytes(4)) assert learner.dfa.matches(bytes(4)) assert learner.dfa.matches(bytes([1] * 4)) assert learner.dfa.matches(bytes([1] * 4)) learner.learn([2, 0, 0, 0]) # Need a length 5 string to distinguish this from # something that just loops back to zero. learner.learn([2, 0, 0, 0, 0]) dfa = learner.dfa assert dfa.is_dead(dfa.transition(dfa.start, 2)) assert dfa.is_dead(dfa.transition(dfa.start, 3))
def test_iteration_with_dead_nodes(): learner = LStar(lambda s: len(s) == 3 and max(s) <= 1 and s[1] == 0) learner.learn([1, 0, 1]) learner.learn([1, 1, 1]) learner.learn([0, 1, 1]) learner.learn([1, 1, 0]) learner.learn([1, 1, 1, 0, 1]) dfa = learner.dfa i = dfa.transition(dfa.start, 1) assert not dfa.is_dead(i) assert dfa.is_dead(dfa.transition(i, 2)) assert list(learner.dfa.all_matching_strings()) == [ bytes([0, 0, 0]), bytes([0, 0, 1]), bytes([1, 0, 0]), bytes([1, 0, 1]), ]
def test_learning_is_just_checking_when_fully_explored(): count = [0] def accept(s): count[0] += 1 return len(s) <= 5 and all(c == 0 for c in s) learner = LStar(accept) for c in [0, 1]: for n in range(10): learner.learn(bytes([c]) * n) assert list(learner.dfa.all_matching_strings()) == [bytes(n) for n in range(6)] (prev,) = count learner.learn([2] * 11) calls = count[0] - prev assert calls == 1
def learner_for(strategy): """Returns an LStar learner that predicts whether a buffer corresponds to a discard free choice sequence leading to a valid value for this strategy.""" try: return LEARNERS[strategy] except KeyError: pass def test_function(data): data.draw(strategy) data.mark_interesting() runner = ConjectureRunner( test_function, settings=settings( database=None, verbosity=Verbosity.quiet, suppress_health_check=HealthCheck.all(), ), random=Random(0), ignore_limits=True, ) def predicate(s): result = runner.cached_test_function(s) if result.status < Status.VALID: return False if result.has_discards: return False return result.buffer == s learner = LStar(predicate) runner.run() (v,) = runner.interesting_examples.values() # We make sure the learner has properly learned small examples. # This is all fairly ad hoc but is mostly designed to get it # to understand what the smallest example is and avoid any # loops at the beginning of the DFA that don't really exist. learner.learn(v.buffer) for n in [1, 2, 3]: for _ in range(5): learner.learn(uniform(runner.random, n) + v.buffer) prev = -1 while learner.generation != prev: prev = learner.generation for _ in range(10): s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE) learner.learn(s) data = runner.cached_test_function(s) if data.status >= Status.VALID: learner.learn(data.buffer) LEARNERS[strategy] = learner return learner
def learn_a_new_dfa(runner, u, v, predicate): """Given two buffers ``u`` and ``v```, learn a DFA that will allow the shrinker to normalise them better. ``u`` and ``v`` should not currently shrink to the same test case when calling this function.""" from hypothesis.internal.conjecture.shrinker import dfa_replacement, sort_key assert predicate(runner.cached_test_function(u)) assert predicate(runner.cached_test_function(v)) u_shrunk = fully_shrink(runner, u, predicate) v_shrunk = fully_shrink(runner, v, predicate) u, v = sorted((u_shrunk.buffer, v_shrunk.buffer), key=sort_key) assert u != v assert not v.startswith(u) # We would like to avoid using LStar on large strings as its # behaviour can be quadratic or worse. In order to help achieve # this we peel off a common prefix and suffix of the two final # results and just learn the internal bit where they differ. # # This potentially reduces the length quite far if there's # just one tricky bit of control flow we're struggling to # reduce inside a strategy somewhere and the rest of the # test function reduces fine. if v.endswith(u): prefix = b"" suffix = u u_core = b"" assert len(u) > 0 v_core = v[:-len(u)] else: i = 0 while u[i] == v[i]: i += 1 prefix = u[:i] assert u.startswith(prefix) assert v.startswith(prefix) i = 1 while u[-i] == v[-i]: i += 1 suffix = u[max(len(prefix), len(u) + 1 - i):] assert u.endswith(suffix) assert v.endswith(suffix) u_core = u[len(prefix):len(u) - len(suffix)] v_core = v[len(prefix):len(v) - len(suffix)] assert u == prefix + u_core + suffix, (list(u), list(v)) assert v == prefix + v_core + suffix, (list(u), list(v)) better = runner.cached_test_function(u) worse = runner.cached_test_function(v) allow_discards = worse.has_discards or better.has_discards def is_valid_core(s): if not (len(u_core) <= len(s) <= len(v_core)): return False buf = prefix + s + suffix result = runner.cached_test_function(buf) return ( predicate(result) # Because we're often using this to learn strategies # rather than entire complex test functions, it's # important that our replacements are precise and # don't leave the rest of the test case in a weird # state. and result.buffer == buf # Because the shrinker is good at removing discarded # data, unless we need discards to allow one or both # of u and v to result in valid shrinks, we don't # count attempts that have them as valid. This will # cause us to match fewer strings, which will make # the resulting shrink pass more efficient when run # on test functions it wasn't really intended for. and (allow_discards or not result.has_discards)) assert sort_key(u_core) < sort_key(v_core) assert is_valid_core(u_core) assert is_valid_core(v_core) learner = LStar(is_valid_core) prev = -1 while learner.generation != prev: prev = learner.generation learner.learn(u_core) learner.learn(v_core) # L* has a tendency to learn DFAs which wrap around to # the beginning. We don't want to it to do that unless # it's accurate, so we use these as examples to show # check going around the DFA twice. learner.learn(u_core * 2) learner.learn(v_core * 2) if learner.dfa.max_length(learner.dfa.start) > len(v_core): # The language we learn is finite and bounded above # by the length of v_core. This is important in order # to keep our shrink passes reasonably efficient - # otherwise they can match far too much. So whenever # we learn a DFA that could match a string longer # than len(v_core) we fix it by finding the first # string longer than v_core and learning that as # a correction. x = next( learner.dfa.all_matching_strings(min_length=len(v_core) + 1)) assert not is_valid_core(x) learner.learn(x) assert not learner.dfa.matches(x) assert learner.generation != prev else: # We mostly care about getting the right answer on the # minimal test case, but because we're doing this offline # anyway we might as well spend a little more time trying # small examples to make sure the learner gets them right. for x in islice(learner.dfa.all_matching_strings(), 100): if not is_valid_core(x): learner.learn(x) assert learner.generation != prev break # We've now successfully learned a DFA that works for shrinking # our failed normalisation further. Canonicalise it into a concrete # DFA so we can save it for later. new_dfa = learner.dfa.canonicalise() assert math.isfinite(new_dfa.max_length(new_dfa.start)) shrinker = runner.new_shrinker(runner.cached_test_function(v), predicate) assert (len(prefix), len(v) - len(suffix)) in shrinker.matching_regions(new_dfa) name = "tmp-dfa-" + repr(new_dfa) shrinker.extra_dfas[name] = new_dfa shrinker.fixate_shrink_passes([dfa_replacement(name)]) assert sort_key(shrinker.buffer) < sort_key(v) return new_dfa
def test_iterates_over_learned_strings(): upper_bound = bytes([1, 2]) learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound) learner.learn(upper_bound) prev = -1 while learner.generation != prev: prev = learner.generation learner.learn([1, 2, 0]) learner.learn([6, 1, 2]) learner.learn([1, 3]) for i in range(7): learner.learn([0, i]) learner.learn([1, i]) learner.learn([2, 0]) learner.learn([2, 0, 0, 0]) learner.learn([2, 0, 0]) learner.learn([0, 6, 0, 0]) learner.learn([1, 3, 0, 0]) learner.learn([1, 6, 0, 0]) learner.learn([0, 0, 0, 0, 0]) dfa = learner.dfa n = 9 matches = list(itertools.islice(dfa.all_matching_strings(), n + 1)) for m in matches: assert learner.member(m), list(m) assert len(matches) == n
def test_cannot_reuse_dfa(): x = LStar(lambda x: len(x) == 3) dfa = x.dfa x.learn(bytes(3)) with pytest.raises(InvalidState): dfa.start
def test_iterates_over_learned_strings(): upper_bound = bytes([1, 2]) learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound) learner.learn(upper_bound) learner.learn([1, 2, 0]) learner.learn([6, 1, 2]) learner.learn([1, 3]) learner.learn([0, 5]) learner.learn([0, 6]) learner.learn([2, 0]) learner.learn([2, 0, 0, 0]) learner.learn([2, 0, 0]) dfa = learner.dfa n = 9 matches = list(itertools.islice(dfa.all_matching_strings(), n + 1)) assert len(matches) == n