Example #1
0
def dominance(left, right):
    """Returns the dominance relation between ``left`` and ``right``, according
    to the rules that one ConjectureResult dominates another if and only if it
    is better in every way.

    The things we currently consider to be "better" are:

        * Something that is smaller in shrinking order is better.
        * Something that has higher status is better.
        * Each ``interesting_origin`` is treated as its own score, so if two
          interesting examples have different origins then neither dominates
          the other.
        * For each target observation, a higher score is better.

    In "normal" operation where there are no bugs or target observations, the
    pareto front only has one element (the smallest valid test case), but for
    more structured or failing tests it can be useful to track, and future work
    will depend on it more."""

    if left.buffer == right.buffer:
        return DominanceRelation.EQUAL

    if sort_key(right.buffer) < sort_key(left.buffer):
        result = dominance(right, left)
        if result == DominanceRelation.LEFT_DOMINATES:
            return DominanceRelation.RIGHT_DOMINATES
        else:
            # Because we have sort_key(left) < sort_key(right) the only options
            # are that right is better than left or that the two are
            # incomparable.
            assert result == DominanceRelation.NO_DOMINANCE
            return result

    # Either left is better or there is no dominance relationship.
    assert sort_key(left.buffer) < sort_key(right.buffer)

    # The right is more interesting
    if left.status < right.status:
        return DominanceRelation.NO_DOMINANCE

    if not right.tags.issubset(left.tags):
        return DominanceRelation.NO_DOMINANCE

    # Things that are interesting for different reasons are incomparable in
    # the dominance relationship.
    if (
        left.status == Status.INTERESTING
        and left.interesting_origin != right.interesting_origin
    ):
        return DominanceRelation.NO_DOMINANCE

    for target in set(left.target_observations) | set(right.target_observations):
        left_score = left.target_observations.get(target, NO_SCORE)
        right_score = right.target_observations.get(target, NO_SCORE)
        if right_score > left_score:
            return DominanceRelation.NO_DOMINANCE

    return DominanceRelation.LEFT_DOMINATES
Example #2
0
    def shrink_interesting_examples(self):
        """If we've found interesting examples, try to replace each of them
        with a minimal interesting example with the same interesting_origin.

        We may find one or more examples with a new interesting_origin
        during the shrink process. If so we shrink these too.
        """
        if Phase.shrink not in self.settings.phases or not self.interesting_examples:
            return

        self.debug("Shrinking interesting examples")

        # If the shrinking phase takes more than five minutes, abort it early and print
        # a warning.   Many CI systems will kill a build after around ten minutes with
        # no output, and appearing to hang isn't great for interactive use either -
        # showing partially-shrunk examples is better than quitting with no examples!
        self.finish_shrinking_deadline = perf_counter() + 300

        for prev_data in sorted(
            self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
        ):
            assert prev_data.status == Status.INTERESTING
            data = self.new_conjecture_data_for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        self.clear_secondary_key()

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, example = min(
                (
                    (k, v)
                    for k, v in self.interesting_examples.items()
                    if k not in self.shrunk_examples
                ),
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug("Shrinking %r" % (target,))

            if not self.settings.report_multiple_bugs:
                # If multi-bug reporting is disabled, we shrink our currently-minimal
                # failure, allowing 'slips' to any bug with a smaller minimal example.
                self.shrink(example, lambda d: d.status == Status.INTERESTING)
                return

            def predicate(d):
                if d.status < Status.INTERESTING:
                    return False
                return d.interesting_origin == target

            self.shrink(example, predicate)

            self.shrunk_examples.add(target)
Example #3
0
    def shrink_interesting_examples(self):
        """If we've found interesting examples, try to replace each of them
        with a minimal interesting example with the same interesting_origin.

        We may find one or more examples with a new interesting_origin
        during the shrink process. If so we shrink these too.
        """
        if Phase.shrink not in self.settings.phases or not self.interesting_examples:
            return

        self.debug("Shrinking interesting examples")

        for prev_data in sorted(
            self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
        ):
            assert prev_data.status == Status.INTERESTING
            data = self.new_conjecture_data_for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        self.clear_secondary_key()

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, example = min(
                (
                    (k, v)
                    for k, v in self.interesting_examples.items()
                    if k not in self.shrunk_examples
                ),
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug("Shrinking %r" % (target,))

            if not self.settings.report_multiple_bugs:
                # If multi-bug reporting is disabled, we shrink our currently-minimal
                # failure, allowing 'slips' to any bug with a smaller minimal example.
                self.shrink(example, lambda d: d.status == Status.INTERESTING)
                return

            def predicate(d):
                if d.status < Status.INTERESTING:
                    return False
                return d.interesting_origin == target

            self.shrink(example, predicate)

            self.shrunk_examples.add(target)
Example #4
0
    def clear_secondary_key(self):
        if self.has_existing_examples():
            # If we have any smaller examples in the secondary corpus, now is
            # a good time to try them to see if they work as shrinks. They
            # probably won't, but it's worth a shot and gives us a good
            # opportunity to clear out the database.

            # It's not worth trying the primary corpus because we already
            # tried all of those in the initial phase.
            corpus = sorted(self.settings.database.fetch(self.secondary_key),
                            key=sort_key)
            for c in corpus:
                primary = {
                    v.buffer
                    for v in self.interesting_examples.values()
                }

                cap = max(map(sort_key, primary))

                if sort_key(c) > cap:
                    break
                else:
                    self.cached_test_function(c)
                    # We unconditionally remove c from the secondary key as it
                    # is either now primary or worse than our primary example
                    # of this reason for interestingness.
                    self.settings.database.delete(self.secondary_key, c)
    def clear_secondary_key(self):
        if self.has_existing_examples():
            # If we have any smaller examples in the secondary corpus, now is
            # a good time to try them to see if they work as shrinks. They
            # probably won't, but it's worth a shot and gives us a good
            # opportunity to clear out the database.

            # It's not worth trying the primary corpus because we already
            # tried all of those in the initial phase.
            corpus = sorted(
                self.settings.database.fetch(self.secondary_key), key=sort_key
            )
            for c in corpus:
                primary = {v.buffer for v in self.interesting_examples.values()}

                cap = max(map(sort_key, primary))

                if sort_key(c) > cap:
                    break
                else:
                    self.cached_test_function(c)
                    # We unconditionally remove c from the secondary key as it
                    # is either now primary or worse than our primary example
                    # of this reason for interestingness.
                    self.settings.database.delete(self.secondary_key, c)
    def shrink_interesting_examples(self):
        """If we've found interesting examples, try to replace each of them
        with a minimal interesting example with the same interesting_origin.

        We may find one or more examples with a new interesting_origin
        during the shrink process. If so we shrink these too.
        """
        if Phase.shrink not in self.settings.phases or not self.interesting_examples:
            return

        for prev_data in sorted(
            self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
        ):
            assert prev_data.status == Status.INTERESTING
            data = self.new_conjecture_data_for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        self.clear_secondary_key()

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, example = min(
                [
                    (k, v)
                    for k, v in self.interesting_examples.items()
                    if k not in self.shrunk_examples
                ],
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug("Shrinking %r" % (target,))

            if not self.settings.report_multiple_bugs:
                # If multi-bug reporting is disabled, we shrink our currently-minimal
                # failure, allowing 'slips' to any bug with a smaller minimal example.
                self.shrink(example, lambda d: d.status == Status.INTERESTING)
                return

            def predicate(d):
                if d.status < Status.INTERESTING:
                    return False
                return d.interesting_origin == target

            self.shrink(example, predicate)

            self.shrunk_examples.add(target)
Example #7
0
def test_fuzz_one_input(buffer_type):
    db = InMemoryExampleDatabase()
    seen = []
    seeds = []

    # This is a standard `@given` test, which we can also use as a fuzz target.
    # Note that we specify the DB so we can make more precise assertions,
    # and tighten the phases so we can be sure the failing examples come from fuzzing.
    @given(st.text())
    @settings(database=db, phases=[Phase.reuse, Phase.shrink])
    def test(s):
        seen.append(s)
        assert "\0" not in s, repr(s)

    # Before running fuzz_one_input, there's nothing in `db`, and so the test passes
    # (because example generation is disabled by the custom settings)
    test()
    assert len(seen) == 0

    # If we run a lot of random bytestrings through fuzz_one_input, we'll eventually
    # find a failing example.
    with pytest.raises(AssertionError):
        for _ in range(1000):
            buf = bytes(random.getrandbits(8) for _ in range(1000))
            seeds.append(buf)
            test.hypothesis.fuzz_one_input(buffer_type(buf))

    # fuzz_one_input returns False for invalid bytestrings, due to e.g. assume(False)
    assert len(seen) <= len(seeds)

    # `db` contains exactly one failing example, which is either the most
    # recent seed that we tried or the pruned-and-canonicalised form of it.
    (saved_examples,) = db.data.values()
    assert len(saved_examples) == 1
    assert sort_key(seeds[-1]) >= sort_key(list(saved_examples)[0])

    # Now that we have a failure in `db`, re-running our test is sufficient to
    # reproduce it, *and shrink to a minimal example*.
    with pytest.raises(AssertionError):
        test()
    assert seen[-1] == "\0"
Example #8
0
    def shrink_interesting_examples(self):
        """If we've found interesting examples, try to replace each of them
        with a minimal interesting example with the same interesting_origin.

        We may find one or more examples with a new interesting_origin
        during the shrink process. If so we shrink these too.
        """
        if Phase.shrink not in self.settings.phases or not self.interesting_examples:
            return

        for prev_data in sorted(self.interesting_examples.values(),
                                key=lambda d: sort_key(d.buffer)):
            assert prev_data.status == Status.INTERESTING
            data = ConjectureData.for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        self.clear_secondary_key()

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, example = min(
                [(k, v) for k, v in self.interesting_examples.items()
                 if k not in self.shrunk_examples],
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug("Shrinking %r" % (target, ))

            def predicate(d):
                if d.status < Status.INTERESTING:
                    return False
                return d.interesting_origin == target

            self.shrink(example, predicate)

            self.shrunk_examples.add(target)
Example #9
0
def learn_a_new_dfa(runner, u, v, predicate):
    """Given two buffers ``u`` and ``v```, learn a DFA that will
    allow the shrinker to normalise them better. ``u`` and ``v``
    should not currently shrink to the same test case when calling
    this function."""
    from hypothesis.internal.conjecture.shrinker import dfa_replacement, sort_key

    assert predicate(runner.cached_test_function(u))
    assert predicate(runner.cached_test_function(v))

    u_shrunk = fully_shrink(runner, u, predicate)
    v_shrunk = fully_shrink(runner, v, predicate)

    u, v = sorted((u_shrunk.buffer, v_shrunk.buffer), key=sort_key)

    assert u != v

    assert not v.startswith(u)

    # We would like to avoid using LStar on large strings as its
    # behaviour can be quadratic or worse. In order to help achieve
    # this we peel off a common prefix and suffix of the two final
    # results and just learn the internal bit where they differ.
    #
    # This potentially reduces the length quite far if there's
    # just one tricky bit of control flow we're struggling to
    # reduce inside a strategy somewhere and the rest of the
    # test function reduces fine.
    if v.endswith(u):
        prefix = b""
        suffix = u
        u_core = b""
        assert len(u) > 0
        v_core = v[:-len(u)]
    else:
        i = 0
        while u[i] == v[i]:
            i += 1
        prefix = u[:i]
        assert u.startswith(prefix)
        assert v.startswith(prefix)

        i = 1
        while u[-i] == v[-i]:
            i += 1

        suffix = u[max(len(prefix), len(u) + 1 - i):]
        assert u.endswith(suffix)
        assert v.endswith(suffix)

        u_core = u[len(prefix):len(u) - len(suffix)]
        v_core = v[len(prefix):len(v) - len(suffix)]

    assert u == prefix + u_core + suffix, (list(u), list(v))
    assert v == prefix + v_core + suffix, (list(u), list(v))

    better = runner.cached_test_function(u)
    worse = runner.cached_test_function(v)

    allow_discards = worse.has_discards or better.has_discards

    def is_valid_core(s):
        if not (len(u_core) <= len(s) <= len(v_core)):
            return False
        buf = prefix + s + suffix
        result = runner.cached_test_function(buf)
        return (
            predicate(result)
            # Because we're often using this to learn strategies
            # rather than entire complex test functions, it's
            # important that our replacements are precise and
            # don't leave the rest of the test case in a weird
            # state.
            and result.buffer == buf
            # Because the shrinker is good at removing discarded
            # data, unless we need discards to allow one or both
            # of u and v to result in valid shrinks, we don't
            # count attempts that have them as valid. This will
            # cause us to match fewer strings, which will make
            # the resulting shrink pass more efficient when run
            # on test functions it wasn't really intended for.
            and (allow_discards or not result.has_discards))

    assert sort_key(u_core) < sort_key(v_core)

    assert is_valid_core(u_core)
    assert is_valid_core(v_core)

    learner = LStar(is_valid_core)

    prev = -1
    while learner.generation != prev:
        prev = learner.generation
        learner.learn(u_core)
        learner.learn(v_core)

        # L* has a tendency to learn DFAs which wrap around to
        # the beginning. We don't want to it to do that unless
        # it's accurate, so we use these as examples to show
        # check going around the DFA twice.
        learner.learn(u_core * 2)
        learner.learn(v_core * 2)

        if learner.dfa.max_length(learner.dfa.start) > len(v_core):
            # The language we learn is finite and bounded above
            # by the length of v_core. This is important in order
            # to keep our shrink passes reasonably efficient -
            # otherwise they can match far too much. So whenever
            # we learn a DFA that could match a string longer
            # than len(v_core) we fix it by finding the first
            # string longer than v_core and learning that as
            # a correction.
            x = next(
                learner.dfa.all_matching_strings(min_length=len(v_core) + 1))
            assert not is_valid_core(x)
            learner.learn(x)
            assert not learner.dfa.matches(x)
            assert learner.generation != prev
        else:
            # We mostly care about getting the right answer on the
            # minimal test case, but because we're doing this offline
            # anyway we might as well spend a little more time trying
            # small examples to make sure the learner gets them right.
            for x in islice(learner.dfa.all_matching_strings(), 100):
                if not is_valid_core(x):
                    learner.learn(x)
                    assert learner.generation != prev
                    break

    # We've now successfully learned a DFA that works for shrinking
    # our failed normalisation further. Canonicalise it into a concrete
    # DFA so we can save it for later.
    new_dfa = learner.dfa.canonicalise()

    assert math.isfinite(new_dfa.max_length(new_dfa.start))

    shrinker = runner.new_shrinker(runner.cached_test_function(v), predicate)

    assert (len(prefix),
            len(v) - len(suffix)) in shrinker.matching_regions(new_dfa)

    name = "tmp-dfa-" + repr(new_dfa)

    shrinker.extra_dfas[name] = new_dfa

    shrinker.fixate_shrink_passes([dfa_replacement(name)])

    assert sort_key(shrinker.buffer) < sort_key(v)

    return new_dfa
Example #10
0
    def test_function(self, data):
        assert isinstance(data.observer, TreeRecordingObserver)
        self.call_count += 1

        interrupted = False
        try:
            self.__stoppable_test_function(data)
        except KeyboardInterrupt:
            interrupted = True
            raise
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            # No branch, because if we're interrupted we always raise
            # the KeyboardInterrupt, never continue to the code below.
            if not interrupted:  # pragma: no branch
                data.freeze()
                self.note_details(data)

        self.debug_data(data)

        assert len(data.buffer) <= BUFFER_SIZE

        if data.status >= Status.VALID:
            for k, v in data.target_observations.items():
                self.best_observed_targets[k] = max(
                    self.best_observed_targets[k], v)

        if data.status == Status.VALID:
            self.valid_examples += 1

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
                self.last_bug_found_at = self.call_count
                if self.first_bug_found_at is None:
                    self.first_bug_found_at = self.call_count
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    self.__data_cache.unpin(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data.as_result()
                self.__data_cache.pin(data.buffer)
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if not self.interesting_examples:
            # Note that this logic is reproduced to end the generation phase when
            # we have interesting examples.  Update that too if you change this!
            # (The doubled implementation is because here we exit the engine entirely,
            #  while in the other case below we just want to move on to shrinking.)
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                    self.settings.max_examples * 10,
                    # We have a high-ish default max iterations, so that tests
                    # don't become flaky when max_examples is too low.
                    1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)
Example #11
0
    def test_function(self, data):
        if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT:
            fail_health_check(
                self.settings,
                ("Your test has been running for at least five minutes. This "
                 "is probably not what you intended, so by default Hypothesis "
                 "turns it into an error."),
                HealthCheck.hung_test,
            )

        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        if data.status == Status.VALID:
            self.valid_examples += 1

        # Record the test result in the tree, to avoid unnecessary work in
        # the future.

        # The tree has two main uses:

        # 1. It is mildly useful in some cases during generation where there is
        #    a high probability of duplication but it is possible to generate
        #    many examples. e.g. if we had input of the form none() | text()
        #    then we would generate duplicates 50% of the time, and would
        #    like to avoid that and spend more time exploring the text() half
        #    of the search space. The tree allows us to predict in advance if
        #    the test would lead to a duplicate and avoid that.
        # 2. When shrinking it is *extremely* useful to be able to anticipate
        #    duplication, because we try many similar and smaller test cases,
        #    and these will tend to have a very high duplication rate. This is
        #    where the tree usage really shines.
        #
        # Unfortunately, as well as being the less useful type of tree usage,
        # the first type is also the most expensive! Once we've entered shrink
        # mode our time remaining is essentially bounded - we're just here
        # until we've found the minimal example. In exploration mode, we might
        # be early on in a very long-running processs, and keeping everything
        # we've ever seen lying around ends up bloating our memory usage
        # substantially by causing us to use O(max_examples) memory.
        #
        # As a compromise, what we do is reset the cache every so often. This
        # keeps our memory usage bounded. It has a few unfortunate failure
        # modes in that it means that we can't always detect when we should
        # have stopped - if we are exploring a language which has only slightly
        # more than cache reset frequency number of members, we will end up
        # exploring indefinitely when we could have stopped. However, this is
        # a fairly unusual case - thanks to exponential blow-ups in language
        # size, most languages are either very large (possibly infinite) or
        # very small. Nevertheless we want CACHE_RESET_FREQUENCY to be quite
        # high to avoid this case coming up in practice.
        if (self.call_count % CACHE_RESET_FREQUENCY == 0
                and not self.interesting_examples):
            self.reset_tree_to_empty()

        self.tree.add(data)

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if not self.interesting_examples:
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                    self.settings.max_examples * 10,
                    # We have a high-ish default max iterations, so that tests
                    # don't become flaky when max_examples is too low.
                    1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)
    def test_function(self, data):
        assert isinstance(data.observer, TreeRecordingObserver)
        self.call_count += 1

        try:
            self.__stoppable_test_function(data)
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        if data.status == Status.VALID:
            self.valid_examples += 1

        # Record the test result in the tree, to avoid unnecessary work in
        # the future.

        # The tree has two main uses:

        # 1. It is mildly useful in some cases during generation where there is
        #    a high probability of duplication but it is possible to generate
        #    many examples. e.g. if we had input of the form none() | text()
        #    then we would generate duplicates 50% of the time, and would
        #    like to avoid that and spend more time exploring the text() half
        #    of the search space. The tree allows us to predict in advance if
        #    the test would lead to a duplicate and avoid that.
        # 2. When shrinking it is *extremely* useful to be able to anticipate
        #    duplication, because we try many similar and smaller test cases,
        #    and these will tend to have a very high duplication rate. This is
        #    where the tree usage really shines.
        #
        # In aid of this, we keep around just enough of the structure of the
        # the tree of examples we've seen so far to let us predict whether
        # something will lead to a known result, and to canonicalize it into
        # the buffer that would belong to the ConjectureData that you get
        # from running it.

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
                self.last_bug_found_at = self.call_count
                if self.first_bug_found_at is None:
                    self.first_bug_found_at = self.call_count
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    self.__data_cache.unpin(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data.as_result()
                self.__data_cache.pin(data.buffer)
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if not self.interesting_examples:
            # Note that this logic is reproduced to end the generation phase when
            # we have interesting examples.  Update that too if you change this!
            # (The doubled implementation is because here we exit the engine entirely,
            #  while in the other case below we just want to move on to shrinking.)
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                self.settings.max_examples * 10,
                # We have a high-ish default max iterations, so that tests
                # don't become flaky when max_examples is too low.
                1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)
Example #13
0
    def __init__(self, random):
        self.__random = random
        self.__eviction_listeners = []

        self.front = SortedList(key=lambda d: sort_key(d.buffer))
        self.__pending = None
Example #14
0
    def test_function(self, data):
        assert isinstance(data.observer, TreeRecordingObserver)
        self.call_count += 1

        interrupted = False
        try:
            self.__stoppable_test_function(data)
        except KeyboardInterrupt:
            interrupted = True
            raise
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            # No branch, because if we're interrupted we always raise
            # the KeyboardInterrupt, never continue to the code below.
            if not interrupted:  # pragma: no branch
                data.freeze()
                call_stats = {
                    "status": data.status.name.lower(),
                    "runtime": data.finish_time - data.start_time,
                    "drawtime": math.fsum(data.draw_times),
                    "events": sorted({self.event_to_string(e) for e in data.events}),
                }
                self.stats_per_test_case.append(call_stats)
                self.__data_cache[data.buffer] = data.as_result()

        self.debug_data(data)

        if self.pareto_front is not None and self.pareto_front.add(data.as_result()):
            self.save_buffer(data.buffer, sub_key=b"pareto")

        assert len(data.buffer) <= BUFFER_SIZE

        if data.status >= Status.VALID:
            for k, v in data.target_observations.items():
                self.best_observed_targets[k] = max(self.best_observed_targets[k], v)

                if k not in self.best_examples_of_observed_targets:
                    self.best_examples_of_observed_targets[k] = data.as_result()
                    continue

                existing_example = self.best_examples_of_observed_targets[k]
                existing_score = existing_example.target_observations[k]

                if v < existing_score:
                    continue

                if v > existing_score or sort_key(data.buffer) < sort_key(
                    existing_example.buffer
                ):
                    self.best_examples_of_observed_targets[k] = data.as_result()

        if data.status == Status.VALID:
            self.valid_examples += 1

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
                self.last_bug_found_at = self.call_count
                if self.first_bug_found_at is None:
                    self.first_bug_found_at = self.call_count
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    self.__data_cache.unpin(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data.as_result()
                self.__data_cache.pin(data.buffer)
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if (
            self.finish_shrinking_deadline is not None
            and self.finish_shrinking_deadline < perf_counter()
        ):
            # See https://github.com/HypothesisWorks/hypothesis/issues/2340
            report(
                "WARNING: Hypothesis has spent more than five minutes working to shrink "
                "a failing example, and stopped because it is making very slow "
                "progress.  When you re-run your tests, shrinking will resume and "
                "may take this long before aborting again.\n"
                "PLEASE REPORT THIS if you can provide a reproducing example, so that "
                "we can improve shrinking performance for everyone."
            )
            self.exit_with(ExitReason.very_slow_shrinking)

        if not self.interesting_examples:
            # Note that this logic is reproduced to end the generation phase when
            # we have interesting examples.  Update that too if you change this!
            # (The doubled implementation is because here we exit the engine entirely,
            #  while in the other case below we just want to move on to shrinking.)
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                self.settings.max_examples * 10,
                # We have a high-ish default max iterations, so that tests
                # don't become flaky when max_examples is too low.
                1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)
Example #15
0
    def test_function(self, data):
        assert isinstance(data.observer, TreeRecordingObserver)
        self.call_count += 1

        try:
            self.__stoppable_test_function(data)
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        if data.status == Status.VALID:
            self.valid_examples += 1

        # Record the test result in the tree, to avoid unnecessary work in
        # the future.

        # The tree has two main uses:

        # 1. It is mildly useful in some cases during generation where there is
        #    a high probability of duplication but it is possible to generate
        #    many examples. e.g. if we had input of the form none() | text()
        #    then we would generate duplicates 50% of the time, and would
        #    like to avoid that and spend more time exploring the text() half
        #    of the search space. The tree allows us to predict in advance if
        #    the test would lead to a duplicate and avoid that.
        # 2. When shrinking it is *extremely* useful to be able to anticipate
        #    duplication, because we try many similar and smaller test cases,
        #    and these will tend to have a very high duplication rate. This is
        #    where the tree usage really shines.
        #
        # In aid of this, we keep around just enough of the structure of the
        # the tree of examples we've seen so far to let us predict whether
        # something will lead to a known result, and to canonicalize it into
        # the buffer that would belong to the ConjectureData that you get
        # from running it.

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
                self.last_bug_found_at = self.call_count
                if self.first_bug_found_at is None:
                    self.first_bug_found_at = self.call_count
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    self.__data_cache.unpin(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data.as_result()
                self.__data_cache.pin(data.buffer)
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if not self.interesting_examples:
            # Note that this logic is reproduced to end the generation phase when
            # we have interesting examples.  Update that too if you change this!
            # (The doubled implementation is because here we exit the engine entirely,
            #  while in the other case below we just want to move on to shrinking.)
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                    self.settings.max_examples * 10,
                    # We have a high-ish default max iterations, so that tests
                    # don't become flaky when max_examples is too low.
                    1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)