def dominance(left, right): """Returns the dominance relation between ``left`` and ``right``, according to the rules that one ConjectureResult dominates another if and only if it is better in every way. The things we currently consider to be "better" are: * Something that is smaller in shrinking order is better. * Something that has higher status is better. * Each ``interesting_origin`` is treated as its own score, so if two interesting examples have different origins then neither dominates the other. * For each target observation, a higher score is better. In "normal" operation where there are no bugs or target observations, the pareto front only has one element (the smallest valid test case), but for more structured or failing tests it can be useful to track, and future work will depend on it more.""" if left.buffer == right.buffer: return DominanceRelation.EQUAL if sort_key(right.buffer) < sort_key(left.buffer): result = dominance(right, left) if result == DominanceRelation.LEFT_DOMINATES: return DominanceRelation.RIGHT_DOMINATES else: # Because we have sort_key(left) < sort_key(right) the only options # are that right is better than left or that the two are # incomparable. assert result == DominanceRelation.NO_DOMINANCE return result # Either left is better or there is no dominance relationship. assert sort_key(left.buffer) < sort_key(right.buffer) # The right is more interesting if left.status < right.status: return DominanceRelation.NO_DOMINANCE if not right.tags.issubset(left.tags): return DominanceRelation.NO_DOMINANCE # Things that are interesting for different reasons are incomparable in # the dominance relationship. if ( left.status == Status.INTERESTING and left.interesting_origin != right.interesting_origin ): return DominanceRelation.NO_DOMINANCE for target in set(left.target_observations) | set(right.target_observations): left_score = left.target_observations.get(target, NO_SCORE) right_score = right.target_observations.get(target, NO_SCORE) if right_score > left_score: return DominanceRelation.NO_DOMINANCE return DominanceRelation.LEFT_DOMINATES
def shrink_interesting_examples(self): """If we've found interesting examples, try to replace each of them with a minimal interesting example with the same interesting_origin. We may find one or more examples with a new interesting_origin during the shrink process. If so we shrink these too. """ if Phase.shrink not in self.settings.phases or not self.interesting_examples: return self.debug("Shrinking interesting examples") # If the shrinking phase takes more than five minutes, abort it early and print # a warning. Many CI systems will kill a build after around ten minutes with # no output, and appearing to hang isn't great for interactive use either - # showing partially-shrunk examples is better than quitting with no examples! self.finish_shrinking_deadline = perf_counter() + 300 for prev_data in sorted( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING data = self.new_conjecture_data_for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) self.clear_secondary_key() while len(self.shrunk_examples) < len(self.interesting_examples): target, example = min( ( (k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples ), key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug("Shrinking %r" % (target,)) if not self.settings.report_multiple_bugs: # If multi-bug reporting is disabled, we shrink our currently-minimal # failure, allowing 'slips' to any bug with a smaller minimal example. self.shrink(example, lambda d: d.status == Status.INTERESTING) return def predicate(d): if d.status < Status.INTERESTING: return False return d.interesting_origin == target self.shrink(example, predicate) self.shrunk_examples.add(target)
def shrink_interesting_examples(self): """If we've found interesting examples, try to replace each of them with a minimal interesting example with the same interesting_origin. We may find one or more examples with a new interesting_origin during the shrink process. If so we shrink these too. """ if Phase.shrink not in self.settings.phases or not self.interesting_examples: return self.debug("Shrinking interesting examples") for prev_data in sorted( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING data = self.new_conjecture_data_for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) self.clear_secondary_key() while len(self.shrunk_examples) < len(self.interesting_examples): target, example = min( ( (k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples ), key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug("Shrinking %r" % (target,)) if not self.settings.report_multiple_bugs: # If multi-bug reporting is disabled, we shrink our currently-minimal # failure, allowing 'slips' to any bug with a smaller minimal example. self.shrink(example, lambda d: d.status == Status.INTERESTING) return def predicate(d): if d.status < Status.INTERESTING: return False return d.interesting_origin == target self.shrink(example, predicate) self.shrunk_examples.add(target)
def clear_secondary_key(self): if self.has_existing_examples(): # If we have any smaller examples in the secondary corpus, now is # a good time to try them to see if they work as shrinks. They # probably won't, but it's worth a shot and gives us a good # opportunity to clear out the database. # It's not worth trying the primary corpus because we already # tried all of those in the initial phase. corpus = sorted(self.settings.database.fetch(self.secondary_key), key=sort_key) for c in corpus: primary = { v.buffer for v in self.interesting_examples.values() } cap = max(map(sort_key, primary)) if sort_key(c) > cap: break else: self.cached_test_function(c) # We unconditionally remove c from the secondary key as it # is either now primary or worse than our primary example # of this reason for interestingness. self.settings.database.delete(self.secondary_key, c)
def clear_secondary_key(self): if self.has_existing_examples(): # If we have any smaller examples in the secondary corpus, now is # a good time to try them to see if they work as shrinks. They # probably won't, but it's worth a shot and gives us a good # opportunity to clear out the database. # It's not worth trying the primary corpus because we already # tried all of those in the initial phase. corpus = sorted( self.settings.database.fetch(self.secondary_key), key=sort_key ) for c in corpus: primary = {v.buffer for v in self.interesting_examples.values()} cap = max(map(sort_key, primary)) if sort_key(c) > cap: break else: self.cached_test_function(c) # We unconditionally remove c from the secondary key as it # is either now primary or worse than our primary example # of this reason for interestingness. self.settings.database.delete(self.secondary_key, c)
def shrink_interesting_examples(self): """If we've found interesting examples, try to replace each of them with a minimal interesting example with the same interesting_origin. We may find one or more examples with a new interesting_origin during the shrink process. If so we shrink these too. """ if Phase.shrink not in self.settings.phases or not self.interesting_examples: return for prev_data in sorted( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING data = self.new_conjecture_data_for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) self.clear_secondary_key() while len(self.shrunk_examples) < len(self.interesting_examples): target, example = min( [ (k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples ], key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug("Shrinking %r" % (target,)) if not self.settings.report_multiple_bugs: # If multi-bug reporting is disabled, we shrink our currently-minimal # failure, allowing 'slips' to any bug with a smaller minimal example. self.shrink(example, lambda d: d.status == Status.INTERESTING) return def predicate(d): if d.status < Status.INTERESTING: return False return d.interesting_origin == target self.shrink(example, predicate) self.shrunk_examples.add(target)
def test_fuzz_one_input(buffer_type): db = InMemoryExampleDatabase() seen = [] seeds = [] # This is a standard `@given` test, which we can also use as a fuzz target. # Note that we specify the DB so we can make more precise assertions, # and tighten the phases so we can be sure the failing examples come from fuzzing. @given(st.text()) @settings(database=db, phases=[Phase.reuse, Phase.shrink]) def test(s): seen.append(s) assert "\0" not in s, repr(s) # Before running fuzz_one_input, there's nothing in `db`, and so the test passes # (because example generation is disabled by the custom settings) test() assert len(seen) == 0 # If we run a lot of random bytestrings through fuzz_one_input, we'll eventually # find a failing example. with pytest.raises(AssertionError): for _ in range(1000): buf = bytes(random.getrandbits(8) for _ in range(1000)) seeds.append(buf) test.hypothesis.fuzz_one_input(buffer_type(buf)) # fuzz_one_input returns False for invalid bytestrings, due to e.g. assume(False) assert len(seen) <= len(seeds) # `db` contains exactly one failing example, which is either the most # recent seed that we tried or the pruned-and-canonicalised form of it. (saved_examples,) = db.data.values() assert len(saved_examples) == 1 assert sort_key(seeds[-1]) >= sort_key(list(saved_examples)[0]) # Now that we have a failure in `db`, re-running our test is sufficient to # reproduce it, *and shrink to a minimal example*. with pytest.raises(AssertionError): test() assert seen[-1] == "\0"
def shrink_interesting_examples(self): """If we've found interesting examples, try to replace each of them with a minimal interesting example with the same interesting_origin. We may find one or more examples with a new interesting_origin during the shrink process. If so we shrink these too. """ if Phase.shrink not in self.settings.phases or not self.interesting_examples: return for prev_data in sorted(self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)): assert prev_data.status == Status.INTERESTING data = ConjectureData.for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) self.clear_secondary_key() while len(self.shrunk_examples) < len(self.interesting_examples): target, example = min( [(k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples], key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug("Shrinking %r" % (target, )) def predicate(d): if d.status < Status.INTERESTING: return False return d.interesting_origin == target self.shrink(example, predicate) self.shrunk_examples.add(target)
def learn_a_new_dfa(runner, u, v, predicate): """Given two buffers ``u`` and ``v```, learn a DFA that will allow the shrinker to normalise them better. ``u`` and ``v`` should not currently shrink to the same test case when calling this function.""" from hypothesis.internal.conjecture.shrinker import dfa_replacement, sort_key assert predicate(runner.cached_test_function(u)) assert predicate(runner.cached_test_function(v)) u_shrunk = fully_shrink(runner, u, predicate) v_shrunk = fully_shrink(runner, v, predicate) u, v = sorted((u_shrunk.buffer, v_shrunk.buffer), key=sort_key) assert u != v assert not v.startswith(u) # We would like to avoid using LStar on large strings as its # behaviour can be quadratic or worse. In order to help achieve # this we peel off a common prefix and suffix of the two final # results and just learn the internal bit where they differ. # # This potentially reduces the length quite far if there's # just one tricky bit of control flow we're struggling to # reduce inside a strategy somewhere and the rest of the # test function reduces fine. if v.endswith(u): prefix = b"" suffix = u u_core = b"" assert len(u) > 0 v_core = v[:-len(u)] else: i = 0 while u[i] == v[i]: i += 1 prefix = u[:i] assert u.startswith(prefix) assert v.startswith(prefix) i = 1 while u[-i] == v[-i]: i += 1 suffix = u[max(len(prefix), len(u) + 1 - i):] assert u.endswith(suffix) assert v.endswith(suffix) u_core = u[len(prefix):len(u) - len(suffix)] v_core = v[len(prefix):len(v) - len(suffix)] assert u == prefix + u_core + suffix, (list(u), list(v)) assert v == prefix + v_core + suffix, (list(u), list(v)) better = runner.cached_test_function(u) worse = runner.cached_test_function(v) allow_discards = worse.has_discards or better.has_discards def is_valid_core(s): if not (len(u_core) <= len(s) <= len(v_core)): return False buf = prefix + s + suffix result = runner.cached_test_function(buf) return ( predicate(result) # Because we're often using this to learn strategies # rather than entire complex test functions, it's # important that our replacements are precise and # don't leave the rest of the test case in a weird # state. and result.buffer == buf # Because the shrinker is good at removing discarded # data, unless we need discards to allow one or both # of u and v to result in valid shrinks, we don't # count attempts that have them as valid. This will # cause us to match fewer strings, which will make # the resulting shrink pass more efficient when run # on test functions it wasn't really intended for. and (allow_discards or not result.has_discards)) assert sort_key(u_core) < sort_key(v_core) assert is_valid_core(u_core) assert is_valid_core(v_core) learner = LStar(is_valid_core) prev = -1 while learner.generation != prev: prev = learner.generation learner.learn(u_core) learner.learn(v_core) # L* has a tendency to learn DFAs which wrap around to # the beginning. We don't want to it to do that unless # it's accurate, so we use these as examples to show # check going around the DFA twice. learner.learn(u_core * 2) learner.learn(v_core * 2) if learner.dfa.max_length(learner.dfa.start) > len(v_core): # The language we learn is finite and bounded above # by the length of v_core. This is important in order # to keep our shrink passes reasonably efficient - # otherwise they can match far too much. So whenever # we learn a DFA that could match a string longer # than len(v_core) we fix it by finding the first # string longer than v_core and learning that as # a correction. x = next( learner.dfa.all_matching_strings(min_length=len(v_core) + 1)) assert not is_valid_core(x) learner.learn(x) assert not learner.dfa.matches(x) assert learner.generation != prev else: # We mostly care about getting the right answer on the # minimal test case, but because we're doing this offline # anyway we might as well spend a little more time trying # small examples to make sure the learner gets them right. for x in islice(learner.dfa.all_matching_strings(), 100): if not is_valid_core(x): learner.learn(x) assert learner.generation != prev break # We've now successfully learned a DFA that works for shrinking # our failed normalisation further. Canonicalise it into a concrete # DFA so we can save it for later. new_dfa = learner.dfa.canonicalise() assert math.isfinite(new_dfa.max_length(new_dfa.start)) shrinker = runner.new_shrinker(runner.cached_test_function(v), predicate) assert (len(prefix), len(v) - len(suffix)) in shrinker.matching_regions(new_dfa) name = "tmp-dfa-" + repr(new_dfa) shrinker.extra_dfas[name] = new_dfa shrinker.fixate_shrink_passes([dfa_replacement(name)]) assert sort_key(shrinker.buffer) < sort_key(v) return new_dfa
def test_function(self, data): assert isinstance(data.observer, TreeRecordingObserver) self.call_count += 1 interrupted = False try: self.__stoppable_test_function(data) except KeyboardInterrupt: interrupted = True raise except BaseException: self.save_buffer(data.buffer) raise finally: # No branch, because if we're interrupted we always raise # the KeyboardInterrupt, never continue to the code below. if not interrupted: # pragma: no branch data.freeze() self.note_details(data) self.debug_data(data) assert len(data.buffer) <= BUFFER_SIZE if data.status >= Status.VALID: for k, v in data.target_observations.items(): self.best_observed_targets[k] = max( self.best_observed_targets[k], v) if data.status == Status.VALID: self.valid_examples += 1 if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data.as_result() self.__data_cache.pin(data.buffer) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: # Note that this logic is reproduced to end the generation phase when # we have interesting examples. Update that too if you change this! # (The doubled implementation is because here we exit the engine entirely, # while in the other case below we just want to move on to shrinking.) if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def test_function(self, data): if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT: fail_health_check( self.settings, ("Your test has been running for at least five minutes. This " "is probably not what you intended, so by default Hypothesis " "turns it into an error."), HealthCheck.hung_test, ) self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise except BaseException: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) if data.status == Status.VALID: self.valid_examples += 1 # Record the test result in the tree, to avoid unnecessary work in # the future. # The tree has two main uses: # 1. It is mildly useful in some cases during generation where there is # a high probability of duplication but it is possible to generate # many examples. e.g. if we had input of the form none() | text() # then we would generate duplicates 50% of the time, and would # like to avoid that and spend more time exploring the text() half # of the search space. The tree allows us to predict in advance if # the test would lead to a duplicate and avoid that. # 2. When shrinking it is *extremely* useful to be able to anticipate # duplication, because we try many similar and smaller test cases, # and these will tend to have a very high duplication rate. This is # where the tree usage really shines. # # Unfortunately, as well as being the less useful type of tree usage, # the first type is also the most expensive! Once we've entered shrink # mode our time remaining is essentially bounded - we're just here # until we've found the minimal example. In exploration mode, we might # be early on in a very long-running processs, and keeping everything # we've ever seen lying around ends up bloating our memory usage # substantially by causing us to use O(max_examples) memory. # # As a compromise, what we do is reset the cache every so often. This # keeps our memory usage bounded. It has a few unfortunate failure # modes in that it means that we can't always detect when we should # have stopped - if we are exploring a language which has only slightly # more than cache reset frequency number of members, we will end up # exploring indefinitely when we could have stopped. However, this is # a fairly unusual case - thanks to exponential blow-ups in language # size, most languages are either very large (possibly infinite) or # very small. Nevertheless we want CACHE_RESET_FREQUENCY to be quite # high to avoid this case coming up in practice. if (self.call_count % CACHE_RESET_FREQUENCY == 0 and not self.interesting_examples): self.reset_tree_to_empty() self.tree.add(data) if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def test_function(self, data): assert isinstance(data.observer, TreeRecordingObserver) self.call_count += 1 try: self.__stoppable_test_function(data) except BaseException: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) if data.status == Status.VALID: self.valid_examples += 1 # Record the test result in the tree, to avoid unnecessary work in # the future. # The tree has two main uses: # 1. It is mildly useful in some cases during generation where there is # a high probability of duplication but it is possible to generate # many examples. e.g. if we had input of the form none() | text() # then we would generate duplicates 50% of the time, and would # like to avoid that and spend more time exploring the text() half # of the search space. The tree allows us to predict in advance if # the test would lead to a duplicate and avoid that. # 2. When shrinking it is *extremely* useful to be able to anticipate # duplication, because we try many similar and smaller test cases, # and these will tend to have a very high duplication rate. This is # where the tree usage really shines. # # In aid of this, we keep around just enough of the structure of the # the tree of examples we've seen so far to let us predict whether # something will lead to a known result, and to canonicalize it into # the buffer that would belong to the ConjectureData that you get # from running it. if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data.as_result() self.__data_cache.pin(data.buffer) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: # Note that this logic is reproduced to end the generation phase when # we have interesting examples. Update that too if you change this! # (The doubled implementation is because here we exit the engine entirely, # while in the other case below we just want to move on to shrinking.) if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def __init__(self, random): self.__random = random self.__eviction_listeners = [] self.front = SortedList(key=lambda d: sort_key(d.buffer)) self.__pending = None
def test_function(self, data): assert isinstance(data.observer, TreeRecordingObserver) self.call_count += 1 interrupted = False try: self.__stoppable_test_function(data) except KeyboardInterrupt: interrupted = True raise except BaseException: self.save_buffer(data.buffer) raise finally: # No branch, because if we're interrupted we always raise # the KeyboardInterrupt, never continue to the code below. if not interrupted: # pragma: no branch data.freeze() call_stats = { "status": data.status.name.lower(), "runtime": data.finish_time - data.start_time, "drawtime": math.fsum(data.draw_times), "events": sorted({self.event_to_string(e) for e in data.events}), } self.stats_per_test_case.append(call_stats) self.__data_cache[data.buffer] = data.as_result() self.debug_data(data) if self.pareto_front is not None and self.pareto_front.add(data.as_result()): self.save_buffer(data.buffer, sub_key=b"pareto") assert len(data.buffer) <= BUFFER_SIZE if data.status >= Status.VALID: for k, v in data.target_observations.items(): self.best_observed_targets[k] = max(self.best_observed_targets[k], v) if k not in self.best_examples_of_observed_targets: self.best_examples_of_observed_targets[k] = data.as_result() continue existing_example = self.best_examples_of_observed_targets[k] existing_score = existing_example.target_observations[k] if v < existing_score: continue if v > existing_score or sort_key(data.buffer) < sort_key( existing_example.buffer ): self.best_examples_of_observed_targets[k] = data.as_result() if data.status == Status.VALID: self.valid_examples += 1 if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data.as_result() self.__data_cache.pin(data.buffer) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if ( self.finish_shrinking_deadline is not None and self.finish_shrinking_deadline < perf_counter() ): # See https://github.com/HypothesisWorks/hypothesis/issues/2340 report( "WARNING: Hypothesis has spent more than five minutes working to shrink " "a failing example, and stopped because it is making very slow " "progress. When you re-run your tests, shrinking will resume and " "may take this long before aborting again.\n" "PLEASE REPORT THIS if you can provide a reproducing example, so that " "we can improve shrinking performance for everyone." ) self.exit_with(ExitReason.very_slow_shrinking) if not self.interesting_examples: # Note that this logic is reproduced to end the generation phase when # we have interesting examples. Update that too if you change this! # (The doubled implementation is because here we exit the engine entirely, # while in the other case below we just want to move on to shrinking.) if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def test_function(self, data): assert isinstance(data.observer, TreeRecordingObserver) self.call_count += 1 try: self.__stoppable_test_function(data) except BaseException: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) if data.status == Status.VALID: self.valid_examples += 1 # Record the test result in the tree, to avoid unnecessary work in # the future. # The tree has two main uses: # 1. It is mildly useful in some cases during generation where there is # a high probability of duplication but it is possible to generate # many examples. e.g. if we had input of the form none() | text() # then we would generate duplicates 50% of the time, and would # like to avoid that and spend more time exploring the text() half # of the search space. The tree allows us to predict in advance if # the test would lead to a duplicate and avoid that. # 2. When shrinking it is *extremely* useful to be able to anticipate # duplication, because we try many similar and smaller test cases, # and these will tend to have a very high duplication rate. This is # where the tree usage really shines. # # In aid of this, we keep around just enough of the structure of the # the tree of examples we've seen so far to let us predict whether # something will lead to a known result, and to canonicalize it into # the buffer that would belong to the ConjectureData that you get # from running it. if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data.as_result() self.__data_cache.pin(data.buffer) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: # Note that this logic is reproduced to end the generation phase when # we have interesting examples. Update that too if you change this! # (The doubled implementation is because here we exit the engine entirely, # while in the other case below we just want to move on to shrinking.) if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)