예제 #1
0
    def evaluate_test_data(self, data):
        try:
            result = self.execute(data)
            if result is not None:
                fail_health_check(
                    self.settings,
                    ('Tests run under @given should return None, but '
                     '%s returned %r instead.') % (self.test.__name__, result),
                    HealthCheck.return_value)
        except UnsatisfiedAssumption:
            data.mark_invalid()
        except (
                HypothesisDeprecationWarning,
                FailedHealthCheck,
                StopTest,
        ) + EXCEPTIONS_TO_RERAISE:
            raise
        except EXCEPTIONS_TO_FAIL as e:
            escalate_hypothesis_internal_error()
            tb = get_trimmed_traceback()
            data.__expected_traceback = ''.join(
                traceback.format_exception(type(e), e, tb))
            data.__expected_exception = e
            verbose_report(data.__expected_traceback)

            origin = traceback.extract_tb(tb)[-1]
            filename = origin[0]
            lineno = origin[1]
            data.mark_interesting((type(e), filename, lineno))
예제 #2
0
파일: core.py 프로젝트: sunito/hypothesis
    def evaluate_test_data(self, data):
        try:
            result = self.execute(data)
            if result is not None:
                fail_health_check(self.settings, (
                    'Tests run under @given should return None, but '
                    '%s returned %r instead.'
                ) % (self.test.__name__, result), HealthCheck.return_value)
            return False
        except UnsatisfiedAssumption:
            data.mark_invalid()
        except (
            HypothesisDeprecationWarning, FailedHealthCheck,
            StopTest,
        ) + EXCEPTIONS_TO_RERAISE:
            raise
        except Exception as e:
            escalate_hypothesis_internal_error()
            data.__expected_traceback = traceback.format_exc()
            data.__expected_exception = e
            verbose_report(data.__expected_traceback)

            error_class, _, tb = sys.exc_info()

            origin = traceback.extract_tb(tb)[-1]
            filename = origin[0]
            lineno = origin[1]
            data.mark_interesting((error_class, filename, lineno))
예제 #3
0
파일: core.py 프로젝트: yssource/hypothesis
    def _execute_once_for_engine(self, data):
        """Wrapper around ``execute_once`` that intercepts test failure
        exceptions and single-test control exceptions, and turns them into
        appropriate method calls to `data` instead.

        This allows the engine to assume that any exception other than
        ``StopTest`` must be a fatal error, and should stop the entire engine.
        """
        try:
            result = self.execute_once(data)
            if result is not None:
                fail_health_check(
                    self.settings,
                    ("Tests run under @given should return None, but "
                     "%s returned %r instead.") % (self.test.__name__, result),
                    HealthCheck.return_value,
                )
        except UnsatisfiedAssumption:
            # An "assume" check failed, so instead we inform the engine that
            # this test run was invalid.
            data.mark_invalid()
        except StopTest:
            # The engine knows how to handle this control exception, so it's
            # OK to re-raise it.
            raise
        except (
                HypothesisDeprecationWarning,
                FailedHealthCheck,
        ) + skip_exceptions_to_reraise():
            # These are fatal errors or control exceptions that should stop the
            # engine, so we re-raise them.
            raise
        except failure_exceptions_to_catch() as e:
            # If the error was raised by Hypothesis-internal code, re-raise it
            # as a fatal error instead of treating it as a test failure.
            escalate_hypothesis_internal_error()

            if data.frozen:
                # This can happen if an error occurred in a finally
                # block somewhere, suppressing our original StopTest.
                # We raise a new one here to resume normal operation.
                raise StopTest(data.testcounter)
            else:
                # The test failed by raising an exception, so we inform the
                # engine that this test run was interesting. This is the normal
                # path for test runs that fail.

                tb = get_trimmed_traceback()
                info = data.extra_information
                info.__expected_traceback = "".join(
                    traceback.format_exception(type(e), e, tb))
                info.__expected_exception = e
                verbose_report(info.__expected_traceback)

                origin = traceback.extract_tb(tb)[-1]
                filename = origin[0]
                lineno = origin[1]
                data.mark_interesting((type(e), filename, lineno))
예제 #4
0
    def evaluate_test_data(self, data):
        try:
            if self.collector is None:
                result = self.execute(data)
            else:  # pragma: no cover
                # This should always be a no-op, but the coverage tracer has
                # a bad habit of resurrecting itself.
                original = sys.gettrace()
                sys.settrace(None)
                try:
                    self.collector.data = {}
                    result = self.execute(data, collect=True)
                finally:
                    sys.settrace(original)
                    covdata = CoverageData()
                    self.collector.save_data(covdata)
                    self.coverage_data.update(covdata)
                    for filename in covdata.measured_files():
                        if is_hypothesis_file(filename):
                            continue
                        data.tags.update(
                            arc(filename, source, target)
                            for source, target in covdata.arcs(filename))
            if result is not None and self.settings.perform_health_check:
                fail_health_check(
                    self.settings,
                    ('Tests run under @given should return None, but '
                     '%s returned %r instead.') % (self.test.__name__, result),
                    HealthCheck.return_value)
            self.at_least_one_success = True
            return False
        except UnsatisfiedAssumption:
            data.mark_invalid()
        except (
                HypothesisDeprecationWarning,
                FailedHealthCheck,
                StopTest,
        ) + exceptions_to_reraise:
            raise
        except Exception as e:
            escalate_hypothesis_internal_error()
            data.__expected_traceback = traceback.format_exc()
            data.__expected_exception = e
            verbose_report(data.__expected_traceback)

            error_class, _, tb = sys.exc_info()

            origin = traceback.extract_tb(tb)[-1]
            filename = origin[0]
            lineno = origin[1]
            data.mark_interesting((error_class, filename, lineno))
예제 #5
0
 def check_invariants(self, settings):
     for invar in self.invariants():
         if self._initialize_rules_to_run and not invar.check_during_init:
             continue
         if not all(precond(self) for precond in invar.preconditions):
             continue
         result = invar.function(self)
         if result is not None:
             fail_health_check(
                 settings,
                 "The return value of an @invariant is always ignored, but "
                 f"{invar.function.__qualname__} returned {result!r} "
                 "instead of None",
                 HealthCheck.return_value,
             )
예제 #6
0
 def check_invariants(self, settings):
     for invar in self.invariants():
         if self._initialize_rules_to_run and not invar.check_during_init:
             continue
         if not all(precond(self) for precond in invar.preconditions):
             continue
         if (current_build_context().is_final
                 or settings.verbosity >= Verbosity.debug):
             report(f"state.{invar.function.__name__}()")
         result = invar.function(self)
         if result is not None:
             fail_health_check(
                 settings,
                 "The return value of an @invariant is always ignored, but "
                 f"{invar.function.__qualname__} returned {result!r} "
                 "instead of None",
                 HealthCheck.return_value,
             )
예제 #7
0
    def evaluate_test_data(self, data):
        try:
            result = self.execute(data)
            if result is not None:
                fail_health_check(
                    self.settings,
                    (
                        "Tests run under @given should return None, but "
                        "%s returned %r instead."
                    )
                    % (self.test.__name__, result),
                    HealthCheck.return_value,
                )
        except UnsatisfiedAssumption:
            data.mark_invalid()
        except (
            HypothesisDeprecationWarning,
            FailedHealthCheck,
            StopTest,
        ) + skip_exceptions_to_reraise():
            raise
        except failure_exceptions_to_catch() as e:
            escalate_hypothesis_internal_error()
            if data.frozen:
                # This can happen if an error occurred in a finally
                # block somewhere, suppressing our original StopTest.
                # We raise a new one here to resume normal operation.
                raise StopTest(data.testcounter)
            else:
                tb = get_trimmed_traceback()
                info = data.extra_information
                info.__expected_traceback = "".join(
                    traceback.format_exception(type(e), e, tb)
                )
                info.__expected_exception = e
                verbose_report(info.__expected_traceback)

                origin = traceback.extract_tb(tb)[-1]
                filename = origin[0]
                lineno = origin[1]
                data.mark_interesting((type(e), filename, lineno))
예제 #8
0
파일: core.py 프로젝트: aarchiba/hypothesis
    def evaluate_test_data(self, data):
        try:
            result = self.execute(data)
            if result is not None:
                fail_health_check(
                    self.settings,
                    (
                        "Tests run under @given should return None, but "
                        "%s returned %r instead."
                    )
                    % (self.test.__name__, result),
                    HealthCheck.return_value,
                )
        except UnsatisfiedAssumption:
            data.mark_invalid()
        except (
            HypothesisDeprecationWarning,
            FailedHealthCheck,
            StopTest,
        ) + skip_exceptions_to_reraise():
            raise
        except failure_exceptions_to_catch() as e:
            escalate_hypothesis_internal_error()
            if data.frozen:
                # This can happen if an error occurred in a finally
                # block somewhere, suppressing our original StopTest.
                # We raise a new one here to resume normal operation.
                raise StopTest(data.testcounter)
            else:
                tb = get_trimmed_traceback()
                info = data.extra_information
                info.__expected_traceback = "".join(
                    traceback.format_exception(type(e), e, tb)
                )
                info.__expected_exception = e
                verbose_report(info.__expected_traceback)

                origin = traceback.extract_tb(tb)[-1]
                filename = origin[0]
                lineno = origin[1]
                data.mark_interesting((type(e), filename, lineno))
예제 #9
0
    def record_for_health_check(self, data):
        # Once we've actually found a bug, there's no point in trying to run
        # health checks - they'll just mask the actually important information.
        if data.status == Status.INTERESTING:
            self.health_check_state = None

        state = self.health_check_state

        if state is None:
            return

        state.draw_times.extend(data.draw_times)

        if data.status == Status.VALID:
            state.valid_examples += 1
        elif data.status == Status.INVALID:
            state.invalid_examples += 1
        else:
            assert data.status == Status.OVERRUN
            state.overrun_examples += 1

        max_valid_draws = 10
        max_invalid_draws = 50
        max_overrun_draws = 20

        assert state.valid_examples <= max_valid_draws

        if state.valid_examples == max_valid_draws:
            self.health_check_state = None
            return

        if state.overrun_examples == max_overrun_draws:
            fail_health_check(
                self.settings,
                ("Examples routinely exceeded the max allowable size. "
                 "(%d examples overran while generating %d valid ones)"
                 ". Generating examples this large will usually lead to"
                 " bad results. You could try setting max_size parameters "
                 "on your collections and turning "
                 "max_leaves down on recursive() calls.") %
                (state.overrun_examples, state.valid_examples),
                HealthCheck.data_too_large,
            )
        if state.invalid_examples == max_invalid_draws:
            fail_health_check(
                self.settings,
                ("It looks like your strategy is filtering out a lot "
                 "of data. Health check found %d filtered examples but "
                 "only %d good ones. This will make your tests much "
                 "slower, and also will probably distort the data "
                 "generation quite a lot. You should adapt your "
                 "strategy to filter less. This can also be caused by "
                 "a low max_leaves parameter in recursive() calls") %
                (state.invalid_examples, state.valid_examples),
                HealthCheck.filter_too_much,
            )

        draw_time = sum(state.draw_times)

        if draw_time > 1.0:
            fail_health_check(
                self.settings,
                ("Data generation is extremely slow: Only produced "
                 "%d valid examples in %.2f seconds (%d invalid ones "
                 "and %d exceeded maximum size). Try decreasing "
                 "size of the data you're generating (with e.g."
                 "max_size or max_leaves parameters).") % (
                     state.valid_examples,
                     draw_time,
                     state.invalid_examples,
                     state.overrun_examples,
                 ),
                HealthCheck.too_slow,
            )
예제 #10
0
파일: core.py 프로젝트: sunito/hypothesis
        def wrapped_test(*arguments, **kwargs):
            # Tell pytest to omit the body of this function from tracebacks
            __tracebackhide__ = True

            test = wrapped_test.hypothesis.inner_test

            if getattr(test, 'is_hypothesis_test', False):
                note_deprecation((
                    'You have applied @given to test: %s more than once. In '
                    'future this will be an error. Applying @given twice '
                    'wraps the test twice, which can be extremely slow. A '
                    'similar effect can be gained by combining the arguments '
                    'of the two calls to given. For example, instead of '
                    '@given(booleans()) @given(integers()), you could write '
                    '@given(booleans(), integers())') % (test.__name__, )
                )

            settings = wrapped_test._hypothesis_internal_use_settings

            random = get_random_for_wrapped_test(test, wrapped_test)

            if infer in generator_kwargs.values():
                hints = get_type_hints(test)
            for name in [name for name, value in generator_kwargs.items()
                         if value is infer]:
                if name not in hints:
                    raise InvalidArgument(
                        'passed %s=infer for %s, but %s has no type annotation'
                        % (name, test.__name__, name))
                generator_kwargs[name] = st.from_type(hints[name])

            processed_args = process_arguments_to_given(
                wrapped_test, arguments, kwargs, generator_arguments,
                generator_kwargs, argspec, test, settings
            )
            arguments, kwargs, test_runner, search_strategy = processed_args

            runner = getattr(search_strategy, 'runner', None)
            if isinstance(runner, TestCase) and test.__name__ in dir(TestCase):
                msg = ('You have applied @given to the method %s, which is '
                       'used by the unittest runner but is not itself a test.'
                       '  This is not useful in any way.' % test.__name__)
                fail_health_check(settings, msg, HealthCheck.not_a_test_method)
            if bad_django_TestCase(runner):  # pragma: no cover
                # Covered by the Django tests, but not the pytest coverage task
                raise InvalidArgument(
                    'You have applied @given to a method on %s, but this '
                    'class does not inherit from the supported versions in '
                    '`hypothesis.extra.django`.  Use the Hypothesis variants '
                    'to ensure that each example is run in a separate '
                    'database transaction.' % qualname(type(runner))
                )

            state = StateForActualGivenExecution(
                test_runner, search_strategy, test, settings, random,
                had_seed=wrapped_test._hypothesis_internal_use_seed
            )

            reproduce_failure = \
                wrapped_test._hypothesis_internal_use_reproduce_failure

            if reproduce_failure is not None:
                expected_version, failure = reproduce_failure
                if expected_version != __version__:
                    raise InvalidArgument((
                        'Attempting to reproduce a failure from a different '
                        'version of Hypothesis. This failure is from %s, but '
                        'you are currently running %r. Please change your '
                        'Hypothesis version to a matching one.'
                    ) % (expected_version, __version__))
                try:
                    state.execute(ConjectureData.for_buffer(
                        decode_failure(failure)),
                        print_example=True, is_final=True,
                    )
                    raise DidNotReproduce(
                        'Expected the test to raise an error, but it '
                        'completed successfully.'
                    )
                except StopTest:
                    raise DidNotReproduce(
                        'The shape of the test data has changed in some way '
                        'from where this blob was defined. Are you sure '
                        "you're running the same test?"
                    )
                except UnsatisfiedAssumption:
                    raise DidNotReproduce(
                        'The test data failed to satisfy an assumption in the '
                        'test. Have you added it since this blob was '
                        'generated?'
                    )

            execute_explicit_examples(
                test_runner, test, wrapped_test, settings, arguments, kwargs
            )

            if settings.max_examples <= 0:
                return

            if not (
                Phase.reuse in settings.phases or
                Phase.generate in settings.phases
            ):
                return

            try:
                if isinstance(runner, TestCase) and hasattr(runner, 'subTest'):
                    subTest = runner.subTest
                    try:
                        setattr(runner, 'subTest', fake_subTest)
                        state.run()
                    finally:
                        setattr(runner, 'subTest', subTest)
                else:
                    state.run()
            except BaseException:
                generated_seed = \
                    wrapped_test._hypothesis_internal_use_generated_seed
                if generated_seed is not None and not state.failed_normally:
                    with local_settings(settings):
                        if running_under_pytest:
                            report(
                                'You can add @seed(%(seed)d) to this test or '
                                'run pytest with --hypothesis-seed=%(seed)d '
                                'to reproduce this failure.' % {
                                    'seed': generated_seed})
                        else:
                            report(
                                'You can add @seed(%d) to this test to '
                                'reproduce this failure.' % (generated_seed,))
                raise
예제 #11
0
        def wrapped_test(*arguments, **kwargs):
            # Tell pytest to omit the body of this function from tracebacks
            __tracebackhide__ = True

            test = wrapped_test.hypothesis.inner_test

            if getattr(test, "is_hypothesis_test", False):
                raise InvalidArgument(
                    (
                        "You have applied @given to the test %s more than once, which "
                        "wraps the test several times and is extremely slow. A "
                        "similar effect can be gained by combining the arguments "
                        "of the two calls to given. For example, instead of "
                        "@given(booleans()) @given(integers()), you could write "
                        "@given(booleans(), integers())"
                    )
                    % (test.__name__,)
                )

            settings = wrapped_test._hypothesis_internal_use_settings

            random = get_random_for_wrapped_test(test, wrapped_test)

            if infer in generator_kwargs.values():
                hints = get_type_hints(test)
            for name in [
                name for name, value in generator_kwargs.items() if value is infer
            ]:
                if name not in hints:
                    raise InvalidArgument(
                        "passed %s=infer for %s, but %s has no type annotation"
                        % (name, test.__name__, name)
                    )
                generator_kwargs[name] = st.from_type(hints[name])

            processed_args = process_arguments_to_given(
                wrapped_test,
                arguments,
                kwargs,
                generator_arguments,
                generator_kwargs,
                argspec,
                test,
                settings,
            )
            arguments, kwargs, test_runner, search_strategy = processed_args

            runner = getattr(search_strategy, "runner", None)
            if isinstance(runner, TestCase) and test.__name__ in dir(TestCase):
                msg = (
                    "You have applied @given to the method %s, which is "
                    "used by the unittest runner but is not itself a test."
                    "  This is not useful in any way." % test.__name__
                )
                fail_health_check(settings, msg, HealthCheck.not_a_test_method)
            if bad_django_TestCase(runner):  # pragma: no cover
                # Covered by the Django tests, but not the pytest coverage task
                raise InvalidArgument(
                    "You have applied @given to a method on %s, but this "
                    "class does not inherit from the supported versions in "
                    "`hypothesis.extra.django`.  Use the Hypothesis variants "
                    "to ensure that each example is run in a separate "
                    "database transaction." % qualname(type(runner))
                )

            state = StateForActualGivenExecution(
                test_runner,
                search_strategy,
                test,
                settings,
                random,
                had_seed=wrapped_test._hypothesis_internal_use_seed,
            )

            reproduce_failure = wrapped_test._hypothesis_internal_use_reproduce_failure

            if reproduce_failure is not None:
                expected_version, failure = reproduce_failure
                if expected_version != __version__:
                    raise InvalidArgument(
                        (
                            "Attempting to reproduce a failure from a different "
                            "version of Hypothesis. This failure is from %s, but "
                            "you are currently running %r. Please change your "
                            "Hypothesis version to a matching one."
                        )
                        % (expected_version, __version__)
                    )
                try:
                    state.execute(
                        ConjectureData.for_buffer(decode_failure(failure)),
                        print_example=True,
                        is_final=True,
                    )
                    raise DidNotReproduce(
                        "Expected the test to raise an error, but it "
                        "completed successfully."
                    )
                except StopTest:
                    raise DidNotReproduce(
                        "The shape of the test data has changed in some way "
                        "from where this blob was defined. Are you sure "
                        "you're running the same test?"
                    )
                except UnsatisfiedAssumption:
                    raise DidNotReproduce(
                        "The test data failed to satisfy an assumption in the "
                        "test. Have you added it since this blob was "
                        "generated?"
                    )

            execute_explicit_examples(
                test_runner, test, wrapped_test, settings, arguments, kwargs
            )

            if settings.max_examples <= 0:
                return

            if not (
                Phase.reuse in settings.phases or Phase.generate in settings.phases
            ):
                return

            try:
                if isinstance(runner, TestCase) and hasattr(runner, "subTest"):
                    subTest = runner.subTest
                    try:
                        setattr(runner, "subTest", fake_subTest)
                        state.run()
                    finally:
                        setattr(runner, "subTest", subTest)
                else:
                    state.run()
            except BaseException as e:
                generated_seed = wrapped_test._hypothesis_internal_use_generated_seed
                with local_settings(settings):
                    if not (state.failed_normally or generated_seed is None):
                        if running_under_pytest:
                            report(
                                "You can add @seed(%(seed)d) to this test or "
                                "run pytest with --hypothesis-seed=%(seed)d "
                                "to reproduce this failure." % {"seed": generated_seed}
                            )
                        else:
                            report(
                                "You can add @seed(%d) to this test to "
                                "reproduce this failure." % (generated_seed,)
                            )
                    # The dance here is to avoid showing users long tracebacks
                    # full of Hypothesis internals they don't care about.
                    # We have to do this inline, to avoid adding another
                    # internal stack frame just when we've removed the rest.
                    if PY2:
                        # Python 2 doesn't have Exception.with_traceback(...);
                        # instead it has a three-argument form of the `raise`
                        # statement.  Unfortunately this is a SyntaxError on
                        # Python 3, and before Python 2.7.9 it was *also* a
                        # SyntaxError to use it in a nested function so we
                        # can't `exec` or `eval` our way out (BPO-21591).
                        # So unless we break some versions of Python 2, none
                        # of them get traceback elision.
                        raise
                    # On Python 3, we swap out the real traceback for our
                    # trimmed version.  Using a variable ensures that the line
                    # which will actually appear in trackbacks is as clear as
                    # possible - "raise the_error_hypothesis_found".
                    the_error_hypothesis_found = e.with_traceback(
                        get_trimmed_traceback()
                    )
                    raise the_error_hypothesis_found
예제 #12
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = self.cached_test_function(hbytes(
            self.settings.buffer_size))
        if zero_data.status == Status.OVERRUN or (
                zero_data.status == Status.VALID
                and len(zero_data.buffer) * 2 > self.settings.buffer_size):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        if zero_data is not Overrun:
            # If the language starts with writes of length >= cap then there is
            # only one string in it: Everything after cap is forced to be zero (or
            # to be whatever value is written there). That means that once we've
            # tried the zero value, there's nothing left for us to do, so we
            # exit early here.
            for i in hrange(self.cap):
                if i not in zero_data.forced_indices:
                    break
            else:
                self.exit_with(ExitReason.finished)

        self.health_check_state = HealthCheckState()

        count = 0
        while not self.interesting_examples and (
                count < 10 or self.health_check_state is not None):
            prefix = self.generate_novel_prefix()

            def draw_bytes(data, n):
                if data.index < len(prefix):
                    result = prefix[data.index:data.index + n]
                    if len(result) < n:
                        result += uniform(self.random, n - len(result))
                else:
                    result = uniform(self.random, n)
                return self.__zero_bound(data, result)

            targets_found = len(self.covering_examples)

            last_data = ConjectureData(max_length=self.settings.buffer_size,
                                       draw_bytes=draw_bytes)
            self.test_function(last_data)
            last_data.freeze()

            count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(draw_bytes=draw_bytes,
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
            else:
                origin = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                data = ConjectureData(draw_bytes=mutator(origin),
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
                if (data.status > origin.status
                        or len(self.covering_examples) > targets_found):
                    mutations = 0
                elif data.status < origin.status or mutations >= 10:
                    # Cap the variations of a single example and move on to
                    # an entirely fresh start.  Ten is an entirely arbitrary
                    # constant, but it's been working well for years.
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, "hit_zero_bound", False):
                zero_bound_queue.append(data)
            mutations += 1
예제 #13
0
    def test_function(self, data):
        if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT:
            fail_health_check(
                self.settings,
                ("Your test has been running for at least five minutes. This "
                 "is probably not what you intended, so by default Hypothesis "
                 "turns it into an error."),
                HealthCheck.hung_test,
            )

        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise
        except BaseException:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        if data.status == Status.VALID:
            self.valid_examples += 1

        # Record the test result in the tree, to avoid unnecessary work in
        # the future.

        # The tree has two main uses:

        # 1. It is mildly useful in some cases during generation where there is
        #    a high probability of duplication but it is possible to generate
        #    many examples. e.g. if we had input of the form none() | text()
        #    then we would generate duplicates 50% of the time, and would
        #    like to avoid that and spend more time exploring the text() half
        #    of the search space. The tree allows us to predict in advance if
        #    the test would lead to a duplicate and avoid that.
        # 2. When shrinking it is *extremely* useful to be able to anticipate
        #    duplication, because we try many similar and smaller test cases,
        #    and these will tend to have a very high duplication rate. This is
        #    where the tree usage really shines.
        #
        # Unfortunately, as well as being the less useful type of tree usage,
        # the first type is also the most expensive! Once we've entered shrink
        # mode our time remaining is essentially bounded - we're just here
        # until we've found the minimal example. In exploration mode, we might
        # be early on in a very long-running processs, and keeping everything
        # we've ever seen lying around ends up bloating our memory usage
        # substantially by causing us to use O(max_examples) memory.
        #
        # As a compromise, what we do is reset the cache every so often. This
        # keeps our memory usage bounded. It has a few unfortunate failure
        # modes in that it means that we can't always detect when we should
        # have stopped - if we are exploring a language which has only slightly
        # more than cache reset frequency number of members, we will end up
        # exploring indefinitely when we could have stopped. However, this is
        # a fairly unusual case - thanks to exponential blow-ups in language
        # size, most languages are either very large (possibly infinite) or
        # very small. Nevertheless we want CACHE_RESET_FREQUENCY to be quite
        # high to avoid this case coming up in practice.
        if (self.call_count % CACHE_RESET_FREQUENCY == 0
                and not self.interesting_examples):
            self.reset_tree_to_empty()

        self.tree.add(data)

        if data.status == Status.INTERESTING:
            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.shrinks += 1
                    self.downgrade_buffer(existing.buffer)
                    changed = True

            if changed:
                self.save_buffer(data.buffer)
                self.interesting_examples[key] = data
                self.shrunk_examples.discard(key)

            if self.shrinks >= MAX_SHRINKS:
                self.exit_with(ExitReason.max_shrinks)

        if not self.interesting_examples:
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                    self.settings.max_examples * 10,
                    # We have a high-ish default max iterations, so that tests
                    # don't become flaky when max_examples is too low.
                    1000,
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

        self.record_for_health_check(data)
예제 #14
0
        def wrapped_test(*arguments, **kwargs):
            # Tell pytest to omit the body of this function from tracebacks
            __tracebackhide__ = True

            if getattr(test, 'is_hypothesis_test', False):
                note_deprecation(
                    'You have applied @given to a test more than once. In '
                    'future this will be an error. Applying @given twice '
                    'wraps the test twice, which can be extremely slow. A '
                    'similar effect can be gained by combining the arguments '
                    'of the two calls to given. For example, instead of '
                    '@given(booleans()) @given(integers()), you could write '
                    '@given(booleans(), integers())')

            settings = wrapped_test._hypothesis_internal_use_settings

            random = get_random_for_wrapped_test(test, wrapped_test)

            if infer in generator_kwargs.values():
                hints = get_type_hints(test)
            for name in [
                    name for name, value in generator_kwargs.items()
                    if value is infer
            ]:
                if name not in hints:
                    raise InvalidArgument(
                        'passed %s=infer for %s, but %s has no type annotation'
                        % (name, test.__name__, name))
                generator_kwargs[name] = st.from_type(hints[name])

            processed_args = process_arguments_to_given(
                wrapped_test, arguments, kwargs, generator_arguments,
                generator_kwargs, argspec, test, settings)
            arguments, kwargs, test_runner, search_strategy = processed_args

            runner = getattr(search_strategy, 'runner', None)
            if isinstance(runner, TestCase) and test.__name__ in dir(TestCase):
                msg = ('You have applied @given to the method %s, which is '
                       'used by the unittest runner but is not itself a test.'
                       '  This is not useful in any way.' % test.__name__)
                fail_health_check(settings, msg, HealthCheck.not_a_test_method)
            if bad_django_TestCase(runner):  # pragma: no cover
                # Covered by the Django tests, but not the pytest coverage task
                raise InvalidArgument(
                    'You have applied @given to a method on %s, but this '
                    'class does not inherit from the supported versions in '
                    '`hypothesis.extra.django`.  Use the Hypothesis variants '
                    'to ensure that each example is run in a separate '
                    'database transaction.' % qualname(type(runner)))

            state = StateForActualGivenExecution(
                test_runner,
                search_strategy,
                test,
                settings,
                random,
                had_seed=wrapped_test._hypothesis_internal_use_seed)

            reproduce_failure = \
                wrapped_test._hypothesis_internal_use_reproduce_failure

            if reproduce_failure is not None:
                expected_version, failure = reproduce_failure
                if expected_version != __version__:
                    raise InvalidArgument(
                        ('Attempting to reproduce a failure from a different '
                         'version of Hypothesis. This failure is from %s, but '
                         'you are currently running %r. Please change your '
                         'Hypothesis version to a matching one.') %
                        (expected_version, __version__))
                try:
                    state.execute(
                        ConjectureData.for_buffer(decode_failure(failure)),
                        print_example=True,
                        is_final=True,
                    )
                    raise DidNotReproduce(
                        'Expected the test to raise an error, but it '
                        'completed successfully.')
                except StopTest:
                    raise DidNotReproduce(
                        'The shape of the test data has changed in some way '
                        'from where this blob was defined. Are you sure '
                        "you're running the same test?")
                except UnsatisfiedAssumption:
                    raise DidNotReproduce(
                        'The test data failed to satisfy an assumption in the '
                        'test. Have you added it since this blob was '
                        'generated?')

            execute_explicit_examples(test_runner, test, wrapped_test,
                                      settings, arguments, kwargs)

            if settings.max_examples <= 0:
                return

            if not (Phase.reuse in settings.phases
                    or Phase.generate in settings.phases):
                return

            try:
                if isinstance(runner, TestCase) and hasattr(runner, 'subTest'):
                    subTest = runner.subTest
                    try:
                        setattr(runner, 'subTest', fake_subTest)
                        state.run()
                    finally:
                        setattr(runner, 'subTest', subTest)
                else:
                    state.run()
            except BaseException:
                generated_seed = \
                    wrapped_test._hypothesis_internal_use_generated_seed
                if generated_seed is not None and not state.failed_normally:
                    if running_under_pytest:
                        report(
                            ('You can add @seed(%(seed)d) to this test or run '
                             'pytest with --hypothesis-seed=%(seed)d to '
                             'reproduce this failure.') %
                            {'seed': generated_seed}, )
                    else:
                        report(
                            ('You can add @seed(%d) to this test to reproduce '
                             'this failure.') % (generated_seed, ))
                raise
예제 #15
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        zero_data = self.cached_test_function(hbytes(self.settings.buffer_size))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        if zero_data.status == Status.OVERRUN or (
            zero_data.status == Status.VALID
            and len(zero_data.buffer) * 2 > self.settings.buffer_size
        ):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        if zero_data is not Overrun:
            # If the language starts with writes of length >= cap then there is
            # only one string in it: Everything after cap is forced to be zero (or
            # to be whatever value is written there). That means that once we've
            # tried the zero value, there's nothing left for us to do, so we
            # exit early here.
            has_non_forced = False

            # It's impossible to fall out of this loop normally because if we
            # did then that would mean that all blocks are writes, so we would
            # already have triggered the exhaustedness check on the tree and
            # finished running.
            for b in zero_data.blocks:  # pragma: no branch
                if b.start >= self.cap:
                    break
                if not b.forced:
                    has_non_forced = True
                    break
            if not has_non_forced:
                self.exit_with(ExitReason.finished)

        self.health_check_state = HealthCheckState()

        def should_generate_more():
            # If we haven't found a bug, keep looking.  We check this before
            # doing anything else as it's by far the most common case.
            if not self.interesting_examples:
                return True
            # If we've found a bug and won't report more than one, stop looking.
            elif not self.settings.report_multiple_bugs:
                return False
            assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count
            # End the generation phase where we would have ended it if no bugs had
            # been found.  This reproduces the exit logic in `self.test_function`,
            # but with the important distinction that this clause will move on to
            # the shrinking phase having found one or more bugs, while the other
            # will exit having found zero bugs.
            if (
                self.valid_examples >= self.settings.max_examples
                or self.call_count >= max(self.settings.max_examples * 10, 1000)
            ):  # pragma: no cover
                return False
            # Otherwise, keep searching for between ten and 'a heuristic' calls.
            # We cap 'calls after first bug' so errors are reported reasonably
            # soon even for tests that are allowed to run for a very long time,
            # or sooner if the latest half of our test effort has been fruitless.
            return self.call_count < MIN_TEST_CALLS or self.call_count < min(
                self.first_bug_found_at + 1000, self.last_bug_found_at * 2
            )

        count = 0
        while should_generate_more() and (
            count < 10
            or self.health_check_state is not None
            # If we have not found a valid prefix yet, the target selector will
            # be empty and the mutation stage will fail with a very rare internal
            # error.  We therefore continue this initial random generation step
            # until we have found at least one prefix to mutate.
            or len(self.target_selector) == 0
        ):
            prefix = self.generate_novel_prefix()

            def draw_bytes(data, n):
                if data.index < len(prefix):
                    result = prefix[data.index : data.index + n]
                    # We always draw prefixes as a whole number of blocks
                    assert len(result) == n
                else:
                    result = uniform(self.random, n)
                return self.__zero_bound(data, result)

            last_data = self.new_conjecture_data(draw_bytes)
            self.test_function(last_data)
            last_data.freeze()

            count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while should_generate_more():
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index : data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__zero_bound(data, result)

                data = self.new_conjecture_data(draw_bytes=draw_bytes)
                self.test_function(data)
                data.freeze()
            else:
                origin = self.target_selector.select()
                mutations += 1
                data = self.new_conjecture_data(draw_bytes=mutator(origin))
                self.test_function(data)
                data.freeze()
                if data.status > origin.status:
                    mutations = 0
                elif data.status < origin.status or mutations >= 10:
                    # Cap the variations of a single example and move on to
                    # an entirely fresh start.  Ten is an entirely arbitrary
                    # constant, but it's been working well for years.
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, "hit_zero_bound", False):
                zero_bound_queue.append(data)
            mutations += 1
예제 #16
0
    def run_state_machine(factory, data):
        cd = data.conjecture_data
        machine = factory()
        check_type(RuleBasedStateMachine, machine, "state_machine_factory()")
        cd.hypothesis_runner = machine

        print_steps = (current_build_context().is_final
                       or current_verbosity() >= Verbosity.debug)
        try:
            if print_steps:
                report(f"state = {machine.__class__.__name__}()")
            machine.check_invariants(settings)
            max_steps = settings.stateful_step_count
            steps_run = 0

            while True:
                # We basically always want to run the maximum number of steps,
                # but need to leave a small probability of terminating early
                # in order to allow for reducing the number of steps once we
                # find a failing test case, so we stop with probability of
                # 2 ** -16 during normal operation but force a stop when we've
                # generated enough steps.
                cd.start_example(STATE_MACHINE_RUN_LABEL)
                if steps_run == 0:
                    cd.draw_bits(16, forced=1)
                elif steps_run >= max_steps:
                    cd.draw_bits(16, forced=0)
                    break
                else:
                    # All we really care about is whether this value is zero
                    # or non-zero, so if it's > 1 we discard it and insert a
                    # replacement value after
                    cd.start_example(SHOULD_CONTINUE_LABEL)
                    should_continue_value = cd.draw_bits(16)
                    if should_continue_value > 1:
                        cd.stop_example(discard=True)
                        cd.draw_bits(16,
                                     forced=int(bool(should_continue_value)))
                    else:
                        cd.stop_example()
                        if should_continue_value == 0:
                            break
                steps_run += 1

                # Choose a rule to run, preferring an initialize rule if there are
                # any which have not been run yet.
                if machine._initialize_rules_to_run:
                    init_rules = [
                        st.tuples(st.just(rule),
                                  st.fixed_dictionaries(rule.arguments))
                        for rule in machine._initialize_rules_to_run
                    ]
                    rule, data = cd.draw(st.one_of(init_rules))
                    machine._initialize_rules_to_run.remove(rule)
                else:
                    rule, data = cd.draw(machine._rules_strategy)

                # Pretty-print the values this rule was called with *before* calling
                # _add_result_to_targets, to avoid printing arguments which are also
                # a return value using the variable name they are assigned to.
                # See https://github.com/HypothesisWorks/hypothesis/issues/2341
                if print_steps:
                    data_to_print = {
                        k: machine._pretty_print(v)
                        for k, v in data.items()
                    }

                # Assign 'result' here in case executing the rule fails below
                result = multiple()
                try:
                    data = dict(data)
                    for k, v in list(data.items()):
                        if isinstance(v, VarReference):
                            data[k] = machine.names_to_values[v.name]
                    result = rule.function(machine, **data)
                    if rule.targets:
                        if isinstance(result, MultipleResults):
                            for single_result in result.values:
                                machine._add_result_to_targets(
                                    rule.targets, single_result)
                        else:
                            machine._add_result_to_targets(
                                rule.targets, result)
                    elif result is not None:
                        fail_health_check(
                            settings,
                            "Rules should return None if they have no target bundle, "
                            f"but {rule.function.__qualname__} returned {result!r}",
                            HealthCheck.return_value,
                        )
                finally:
                    if print_steps:
                        # 'result' is only used if the step has target bundles.
                        # If it does, and the result is a 'MultipleResult',
                        # then 'print_step' prints a multi-variable assignment.
                        machine._print_step(rule, data_to_print, result)
                machine.check_invariants(settings)
                cd.stop_example()
        finally:
            if print_steps:
                report("state.teardown()")
            machine.teardown()
예제 #17
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        zero_data = self.cached_test_function(hbytes(BUFFER_SIZE))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        self.optimise_all(zero_data)

        if zero_data.status == Status.OVERRUN or (
                zero_data.status == Status.VALID
                and len(zero_data.buffer) * 2 > BUFFER_SIZE):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        self.health_check_state = HealthCheckState()

        def should_generate_more():
            # If we haven't found a bug, keep looking.  We check this before
            # doing anything else as it's by far the most common case.
            if not self.interesting_examples:
                return True
            # If we've found a bug and won't report more than one, stop looking.
            elif not self.settings.report_multiple_bugs:
                return False
            assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count
            # End the generation phase where we would have ended it if no bugs had
            # been found.  This reproduces the exit logic in `self.test_function`,
            # but with the important distinction that this clause will move on to
            # the shrinking phase having found one or more bugs, while the other
            # will exit having found zero bugs.
            if (self.valid_examples >= self.settings.max_examples
                    or self.call_count >= max(self.settings.max_examples * 10,
                                              1000)):  # pragma: no cover
                return False
            # Otherwise, keep searching for between ten and 'a heuristic' calls.
            # We cap 'calls after first bug' so errors are reported reasonably
            # soon even for tests that are allowed to run for a very long time,
            # or sooner if the latest half of our test effort has been fruitless.
            return self.call_count < MIN_TEST_CALLS or self.call_count < min(
                self.first_bug_found_at + 1000, self.last_bug_found_at * 2)

        # GenerationParameters are a set of decisions we make that are global
        # to the whole test case, used to bias the data generation in various
        # ways. This is an approach very very loosely inspired by the paper
        # "Swarm testing." by Groce et al. in that it induces deliberate
        # correlation between otherwise independent decisions made during the
        # generation process.
        #
        # More importantly the generation is designed to make certain scenarios
        # more likely (e.g. small examples, duplicated values), which can help
        # or hurt in terms of finding interesting things. Whenever the result
        # of our generation is a bad test case, for whatever definition of
        # "bad" we like (currently, invalid or too large), we ditch the
        # parameter early. This allows us to potentially generate good test
        # cases significantly more often than we otherwise would, by selecting
        # for parameters that make them more likely.
        parameter = GenerationParameters(self.random)
        count = 0

        # We attempt to use the size of the minimal generated test case starting
        # from a given novel prefix as a guideline to generate smaller test
        # cases for an initial period, by restriscting ourselves to test cases
        # that are not much larger than it.
        #
        # Calculating the actual minimal generated test case is hard, so we
        # take a best guess that zero extending a prefix produces the minimal
        # test case starting with that prefix (this is true for our built in
        # strategies). This is only a reasonable thing to do if the resulting
        # test case is valid. If we regularly run into situations where it is
        # not valid then this strategy is a waste of time, so we want to
        # abandon it early. In order to do this we track how many times in a
        # row it has failed to work, and abort small test case generation when
        # it has failed too many times in a row.
        consecutive_zero_extend_is_invalid = 0

        while should_generate_more():
            prefix = self.generate_novel_prefix()
            assert len(prefix) <= BUFFER_SIZE

            # We control growth during initial example generation, for two
            # reasons:
            #
            # * It gives us an opportunity to find small examples early, which
            #   gives us a fast path for easy to find bugs.
            # * It avoids low probability events where we might end up
            #   generating very large examples during health checks, which
            #   on slower machines can trigger HealthCheck.too_slow.
            #
            # The heuristic we use is that we attempt to estimate the smallest
            # extension of this prefix, and limit the size to no more than
            # an order of magnitude larger than that. If we fail to estimate
            # the size accurately, we skip over this prefix and try again.
            #
            # We need to tune the example size based on the initial prefix,
            # because any fixed size might be too small, and any size based
            # on the strategy in general can fall afoul of strategies that
            # have very different sizes for different prefixes.
            small_example_cap = clamp(10, self.settings.max_examples // 10, 50)

            if (self.valid_examples <= small_example_cap
                    and self.call_count <= 5 * small_example_cap
                    and not self.interesting_examples
                    and consecutive_zero_extend_is_invalid < 5):
                minimal_example = self.cached_test_function(
                    prefix + hbytes(BUFFER_SIZE - len(prefix)))

                if minimal_example.status < Status.VALID:
                    consecutive_zero_extend_is_invalid += 1
                    continue

                consecutive_zero_extend_is_invalid = 0

                minimal_extension = len(minimal_example.buffer) - len(prefix)

                max_length = min(
                    len(prefix) + minimal_extension * 10, BUFFER_SIZE)

                # We could end up in a situation where even though the prefix was
                # novel when we generated it, because we've now tried zero extending
                # it not all possible continuations of it will be novel. In order to
                # avoid making redundant test calls, we rerun it in simulation mode
                # first. If this has a predictable result, then we don't bother
                # running the test function for real here. If however we encounter
                # some novel behaviour, we try again with the real test function,
                # starting from the new novel prefix that has discovered.
                try:
                    trial_data = self.new_conjecture_data(
                        prefix=prefix,
                        parameter=parameter,
                        max_length=max_length)
                    self.tree.simulate_test_function(trial_data)
                    continue
                except PreviouslyUnseenBehaviour:
                    pass

                # If the simulation entered part of the tree that has been killed,
                # we don't want to run this.
                if trial_data.observer.killed:
                    continue

                # We might have hit the cap on number of examples we should
                # run when calculating the minimal example.
                if not should_generate_more():
                    break

                prefix = trial_data.buffer
            else:
                max_length = BUFFER_SIZE

            data = self.new_conjecture_data(prefix=prefix,
                                            parameter=parameter,
                                            max_length=max_length)

            self.test_function(data)

            self.optimise_all(data)

            count += 1
            if (data.status < Status.VALID
                    or len(data.buffer) * 2 >= BUFFER_SIZE or count > 5):
                count = 0
                parameter = GenerationParameters(self.random)
예제 #18
0
파일: core.py 프로젝트: yssource/hypothesis
        def wrapped_test(*arguments, **kwargs):
            # Tell pytest to omit the body of this function from tracebacks
            __tracebackhide__ = True

            test = wrapped_test.hypothesis.inner_test

            if getattr(test, "is_hypothesis_test", False):
                raise InvalidArgument((
                    "You have applied @given to the test %s more than once, which "
                    "wraps the test several times and is extremely slow. A "
                    "similar effect can be gained by combining the arguments "
                    "of the two calls to given. For example, instead of "
                    "@given(booleans()) @given(integers()), you could write "
                    "@given(booleans(), integers())") % (test.__name__, ))

            settings = wrapped_test._hypothesis_internal_use_settings

            random = get_random_for_wrapped_test(test, wrapped_test)

            # Use type information to convert "infer" arguments into appropriate
            # strategies.
            if infer in given_kwargs.values():
                hints = get_type_hints(test)
            for name in [
                    name for name, value in given_kwargs.items()
                    if value is infer
            ]:
                if name not in hints:
                    raise InvalidArgument(
                        "passed %s=infer for %s, but %s has no type annotation"
                        % (name, test.__name__, name))
                given_kwargs[name] = st.from_type(hints[name])

            processed_args = process_arguments_to_given(
                wrapped_test,
                arguments,
                kwargs,
                given_kwargs,
                argspec,
                test,
                settings,
            )
            arguments, kwargs, test_runner, search_strategy = processed_args

            runner = getattr(search_strategy, "runner", None)
            if isinstance(runner, TestCase) and test.__name__ in dir(TestCase):
                msg = ("You have applied @given to the method %s, which is "
                       "used by the unittest runner but is not itself a test."
                       "  This is not useful in any way." % test.__name__)
                fail_health_check(settings, msg, HealthCheck.not_a_test_method)
            if bad_django_TestCase(runner):  # pragma: no cover
                # Covered by the Django tests, but not the pytest coverage task
                raise InvalidArgument(
                    "You have applied @given to a method on %s, but this "
                    "class does not inherit from the supported versions in "
                    "`hypothesis.extra.django`.  Use the Hypothesis variants "
                    "to ensure that each example is run in a separate "
                    "database transaction." % qualname(type(runner)))

            state = StateForActualGivenExecution(
                test_runner,
                search_strategy,
                test,
                settings,
                random,
                wrapped_test,
            )

            reproduce_failure = wrapped_test._hypothesis_internal_use_reproduce_failure

            # If there was a @reproduce_failure decorator, use it to reproduce
            # the error (or complain that we couldn't). Either way, this will
            # always raise some kind of error.
            if reproduce_failure is not None:
                expected_version, failure = reproduce_failure
                if expected_version != __version__:
                    raise InvalidArgument(
                        ("Attempting to reproduce a failure from a different "
                         "version of Hypothesis. This failure is from %s, but "
                         "you are currently running %r. Please change your "
                         "Hypothesis version to a matching one.") %
                        (expected_version, __version__))
                try:
                    state.execute_once(
                        ConjectureData.for_buffer(decode_failure(failure)),
                        print_example=True,
                        is_final=True,
                    )
                    raise DidNotReproduce(
                        "Expected the test to raise an error, but it "
                        "completed successfully.")
                except StopTest:
                    raise DidNotReproduce(
                        "The shape of the test data has changed in some way "
                        "from where this blob was defined. Are you sure "
                        "you're running the same test?")
                except UnsatisfiedAssumption:
                    raise DidNotReproduce(
                        "The test data failed to satisfy an assumption in the "
                        "test. Have you added it since this blob was "
                        "generated?")

            # There was no @reproduce_failure, so start by running any explicit
            # examples from @example decorators.

            execute_explicit_examples(state, wrapped_test, arguments, kwargs)

            # If there were any explicit examples, they all ran successfully.
            # The next step is to use the Conjecture engine to run the test on
            # many different inputs.

            if settings.max_examples <= 0:
                return

            if not (Phase.reuse in settings.phases
                    or Phase.generate in settings.phases):
                return

            try:
                if isinstance(runner, TestCase) and hasattr(runner, "subTest"):
                    subTest = runner.subTest
                    try:
                        runner.subTest = fake_subTest
                        state.run_engine()
                    finally:
                        runner.subTest = subTest
                else:
                    state.run_engine()
            except BaseException as e:
                # The exception caught here should either be an actual test
                # failure (or MultipleFailures), or some kind of fatal error
                # that caused the engine to stop.

                generated_seed = wrapped_test._hypothesis_internal_use_generated_seed
                with local_settings(settings):
                    if not (state.failed_normally or generated_seed is None):
                        if running_under_pytest:
                            report(
                                "You can add @seed(%(seed)d) to this test or "
                                "run pytest with --hypothesis-seed=%(seed)d "
                                "to reproduce this failure." %
                                {"seed": generated_seed})
                        else:
                            report("You can add @seed(%d) to this test to "
                                   "reproduce this failure." %
                                   (generated_seed, ))
                    # The dance here is to avoid showing users long tracebacks
                    # full of Hypothesis internals they don't care about.
                    # We have to do this inline, to avoid adding another
                    # internal stack frame just when we've removed the rest.
                    if PY2:
                        # Python 2 doesn't have Exception.with_traceback(...);
                        # instead it has a three-argument form of the `raise`
                        # statement.  Unfortunately this is a SyntaxError on
                        # Python 3, and before Python 2.7.9 it was *also* a
                        # SyntaxError to use it in a nested function so we
                        # can't `exec` or `eval` our way out (BPO-21591).
                        # So unless we break some versions of Python 2, none
                        # of them get traceback elision.
                        raise
                    # On Python 3, we swap out the real traceback for our
                    # trimmed version.  Using a variable ensures that the line
                    # which will actually appear in tracebacks is as clear as
                    # possible - "raise the_error_hypothesis_found".
                    the_error_hypothesis_found = e.with_traceback(
                        get_trimmed_traceback())
                    raise the_error_hypothesis_found
예제 #19
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        self.debug("Generating new examples")

        assert self.should_generate_more()
        zero_data = self.cached_test_function(bytes(BUFFER_SIZE))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        if zero_data.status == Status.OVERRUN or (
            zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE
        ):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        self.health_check_state = HealthCheckState()

        # We attempt to use the size of the minimal generated test case starting
        # from a given novel prefix as a guideline to generate smaller test
        # cases for an initial period, by restriscting ourselves to test cases
        # that are not much larger than it.
        #
        # Calculating the actual minimal generated test case is hard, so we
        # take a best guess that zero extending a prefix produces the minimal
        # test case starting with that prefix (this is true for our built in
        # strategies). This is only a reasonable thing to do if the resulting
        # test case is valid. If we regularly run into situations where it is
        # not valid then this strategy is a waste of time, so we want to
        # abandon it early. In order to do this we track how many times in a
        # row it has failed to work, and abort small test case generation when
        # it has failed too many times in a row.
        consecutive_zero_extend_is_invalid = 0

        # We control growth during initial example generation, for two
        # reasons:
        #
        # * It gives us an opportunity to find small examples early, which
        #   gives us a fast path for easy to find bugs.
        # * It avoids low probability events where we might end up
        #   generating very large examples during health checks, which
        #   on slower machines can trigger HealthCheck.too_slow.
        #
        # The heuristic we use is that we attempt to estimate the smallest
        # extension of this prefix, and limit the size to no more than
        # an order of magnitude larger than that. If we fail to estimate
        # the size accurately, we skip over this prefix and try again.
        #
        # We need to tune the example size based on the initial prefix,
        # because any fixed size might be too small, and any size based
        # on the strategy in general can fall afoul of strategies that
        # have very different sizes for different prefixes.
        small_example_cap = clamp(10, self.settings.max_examples // 10, 50)

        optimise_at = max(self.settings.max_examples // 2, small_example_cap + 1)
        ran_optimisations = False

        while self.should_generate_more():
            prefix = self.generate_novel_prefix()
            assert len(prefix) <= BUFFER_SIZE
            if (
                self.valid_examples <= small_example_cap
                and self.call_count <= 5 * small_example_cap
                and not self.interesting_examples
                and consecutive_zero_extend_is_invalid < 5
            ):
                minimal_example = self.cached_test_function(
                    prefix + bytes(BUFFER_SIZE - len(prefix))
                )

                if minimal_example.status < Status.VALID:
                    consecutive_zero_extend_is_invalid += 1
                    continue

                consecutive_zero_extend_is_invalid = 0

                minimal_extension = len(minimal_example.buffer) - len(prefix)

                max_length = min(len(prefix) + minimal_extension * 10, BUFFER_SIZE)

                # We could end up in a situation where even though the prefix was
                # novel when we generated it, because we've now tried zero extending
                # it not all possible continuations of it will be novel. In order to
                # avoid making redundant test calls, we rerun it in simulation mode
                # first. If this has a predictable result, then we don't bother
                # running the test function for real here. If however we encounter
                # some novel behaviour, we try again with the real test function,
                # starting from the new novel prefix that has discovered.
                try:
                    trial_data = self.new_conjecture_data(
                        prefix=prefix, max_length=max_length
                    )
                    self.tree.simulate_test_function(trial_data)
                    continue
                except PreviouslyUnseenBehaviour:
                    pass

                # If the simulation entered part of the tree that has been killed,
                # we don't want to run this.
                if trial_data.observer.killed:
                    continue

                # We might have hit the cap on number of examples we should
                # run when calculating the minimal example.
                if not self.should_generate_more():
                    break

                prefix = trial_data.buffer
            else:
                max_length = BUFFER_SIZE

            data = self.new_conjecture_data(prefix=prefix, max_length=max_length)

            self.test_function(data)

            self.generate_mutations_from(data)

            # Although the optimisations are logically a distinct phase, we
            # actually normally run them as part of example generation. The
            # reason for this is that we cannot guarantee that optimisation
            # actually exhausts our budget: It might finish running and we
            # discover that actually we still could run a bunch more test cases
            # if we want.
            if (
                self.valid_examples >= max(small_example_cap, optimise_at)
                and not ran_optimisations
            ):
                ran_optimisations = True
                self.optimise_targets()
예제 #20
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        zero_data = self.cached_test_function(hbytes(BUFFER_SIZE))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        self.optimise_all(zero_data)

        if zero_data.status == Status.OVERRUN or (
                zero_data.status == Status.VALID
                and len(zero_data.buffer) * 2 > BUFFER_SIZE):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        self.health_check_state = HealthCheckState()

        def should_generate_more():
            # If we haven't found a bug, keep looking.  We check this before
            # doing anything else as it's by far the most common case.
            if not self.interesting_examples:
                return True
            # If we've found a bug and won't report more than one, stop looking.
            elif not self.settings.report_multiple_bugs:
                return False
            assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count
            # End the generation phase where we would have ended it if no bugs had
            # been found.  This reproduces the exit logic in `self.test_function`,
            # but with the important distinction that this clause will move on to
            # the shrinking phase having found one or more bugs, while the other
            # will exit having found zero bugs.
            if (self.valid_examples >= self.settings.max_examples
                    or self.call_count >= max(self.settings.max_examples * 10,
                                              1000)):  # pragma: no cover
                return False
            # Otherwise, keep searching for between ten and 'a heuristic' calls.
            # We cap 'calls after first bug' so errors are reported reasonably
            # soon even for tests that are allowed to run for a very long time,
            # or sooner if the latest half of our test effort has been fruitless.
            return self.call_count < MIN_TEST_CALLS or self.call_count < min(
                self.first_bug_found_at + 1000, self.last_bug_found_at * 2)

        # GenerationParameters are a set of decisions we make that are global
        # to the whole test case, used to bias the data generation in various
        # ways. This is an approach very very loosely inspired by the paper
        # "Swarm testing." by Groce et al. in that it induces deliberate
        # correlation between otherwise independent decisions made during the
        # generation process.
        #
        # More importantly the generation is designed to make certain scenarios
        # more likely (e.g. small examples, duplicated values), which can help
        # or hurt in terms of finding interesting things. Whenever the result
        # of our generation is a bad test case, for whatever definition of
        # "bad" we like (currently, invalid or too large), we ditch the
        # parameter early. This allows us to potentially generate good test
        # cases significantly more often than we otherwise would, by selecting
        # for parameters that make them more likely.
        parameter = GenerationParameters(self.random)
        count = 0

        while should_generate_more():
            prefix = self.generate_novel_prefix()

            data = self.new_conjecture_data(draw_bytes_with(prefix, parameter))
            self.test_function(data)

            self.optimise_all(data)

            count += 1
            if (data.status < Status.VALID
                    or len(data.buffer) * 2 >= BUFFER_SIZE or count > 5):
                count = 0
                parameter = GenerationParameters(self.random)
예제 #21
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        zero_data = self.cached_test_function(hbytes(BUFFER_SIZE))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        if zero_data.status == Status.OVERRUN or (
                zero_data.status == Status.VALID
                and len(zero_data.buffer) * 2 > BUFFER_SIZE):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        if zero_data is not Overrun:
            # If the language starts with writes of length >= cap then there is
            # only one string in it: Everything after cap is forced to be zero (or
            # to be whatever value is written there). That means that once we've
            # tried the zero value, there's nothing left for us to do, so we
            # exit early here.
            has_non_forced = False

            # It's impossible to fall out of this loop normally because if we
            # did then that would mean that all blocks are writes, so we would
            # already have triggered the exhaustedness check on the tree and
            # finished running.
            for b in zero_data.blocks:  # pragma: no branch
                if b.start >= self.cap:
                    break
                if not b.forced:
                    has_non_forced = True
                    break
            if not has_non_forced:
                self.exit_with(ExitReason.finished)

        self.health_check_state = HealthCheckState()

        def should_generate_more():
            # If we haven't found a bug, keep looking.  We check this before
            # doing anything else as it's by far the most common case.
            if not self.interesting_examples:
                return True
            # If we've found a bug and won't report more than one, stop looking.
            elif not self.settings.report_multiple_bugs:
                return False
            assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count
            # End the generation phase where we would have ended it if no bugs had
            # been found.  This reproduces the exit logic in `self.test_function`,
            # but with the important distinction that this clause will move on to
            # the shrinking phase having found one or more bugs, while the other
            # will exit having found zero bugs.
            if (self.valid_examples >= self.settings.max_examples
                    or self.call_count >= max(self.settings.max_examples * 10,
                                              1000)):  # pragma: no cover
                return False
            # Otherwise, keep searching for between ten and 'a heuristic' calls.
            # We cap 'calls after first bug' so errors are reported reasonably
            # soon even for tests that are allowed to run for a very long time,
            # or sooner if the latest half of our test effort has been fruitless.
            return self.call_count < MIN_TEST_CALLS or self.call_count < min(
                self.first_bug_found_at + 1000, self.last_bug_found_at * 2)

        count = 0
        mutations = 0
        mutator = self._new_mutator()
        zero_bound_queue = []

        while should_generate_more():
            if (count < 10 or self.health_check_state is not None
                    # If we have not found a valid prefix yet, the target selector will
                    # be empty and the mutation stage will fail with a very rare internal
                    # error.  We therefore continue this initial random generation step
                    # until we have found at least one prefix to mutate.
                    or len(self.target_selector) == 0
                    # For long-running tests, if we are not currently dealing with an
                    # overrun we want a small chance to generate an entirely novel buffer.
                    or not (zero_bound_queue or self.random.randrange(20))):
                prefix = self.generate_novel_prefix()

                def draw_bytes(data, n):
                    if data.index < len(prefix):
                        result = prefix[data.index:data.index + n]
                        # We always draw prefixes as a whole number of blocks
                        assert len(result) == n
                    else:
                        result = uniform(self.random, n)
                    return self.__zero_bound(data, result)

                data = self.new_conjecture_data(draw_bytes)
                self.test_function(data)
                data.freeze()
                count += 1
            elif zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__zero_bound(data, result)

                data = self.new_conjecture_data(draw_bytes=draw_bytes)
                self.test_function(data)
                data.freeze()
            else:
                origin = self.target_selector.select()
                mutations += 1
                data = self.new_conjecture_data(draw_bytes=mutator(origin))
                self.test_function(data)
                data.freeze()
                if data.status > origin.status:
                    mutations = 0
                elif data.status < origin.status or mutations >= 10:
                    # Cap the variations of a single example and move on to
                    # an entirely fresh start.  Ten is an entirely arbitrary
                    # constant, but it's been working well for years.
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, "hit_zero_bound", False):
                zero_bound_queue.append(data)
            mutations += 1
예제 #22
0
        def wrapped_test(*arguments, **kwargs):
            # Tell pytest to omit the body of this function from tracebacks
            __tracebackhide__ = True

            test = wrapped_test.hypothesis.inner_test

            if getattr(test, 'is_hypothesis_test', False):
                note_deprecation(
                    ('You have applied @given to test: %s more than once. In '
                     'future this will be an error. Applying @given twice '
                     'wraps the test twice, which can be extremely slow. A '
                     'similar effect can be gained by combining the arguments '
                     'of the two calls to given. For example, instead of '
                     '@given(booleans()) @given(integers()), you could write '
                     '@given(booleans(), integers())') % (test.__name__, ))

            settings = wrapped_test._hypothesis_internal_use_settings

            random = get_random_for_wrapped_test(test, wrapped_test)

            if infer in generator_kwargs.values():
                hints = get_type_hints(test)
            for name in [
                    name for name, value in generator_kwargs.items()
                    if value is infer
            ]:
                if name not in hints:
                    raise InvalidArgument(
                        'passed %s=infer for %s, but %s has no type annotation'
                        % (name, test.__name__, name))
                generator_kwargs[name] = st.from_type(hints[name])

            processed_args = process_arguments_to_given(
                wrapped_test, arguments, kwargs, generator_arguments,
                generator_kwargs, argspec, test, settings)
            arguments, kwargs, test_runner, search_strategy = processed_args

            runner = getattr(search_strategy, 'runner', None)
            if isinstance(runner, TestCase) and test.__name__ in dir(TestCase):
                msg = ('You have applied @given to the method %s, which is '
                       'used by the unittest runner but is not itself a test.'
                       '  This is not useful in any way.' % test.__name__)
                fail_health_check(settings, msg, HealthCheck.not_a_test_method)
            if bad_django_TestCase(runner):  # pragma: no cover
                # Covered by the Django tests, but not the pytest coverage task
                raise InvalidArgument(
                    'You have applied @given to a method on %s, but this '
                    'class does not inherit from the supported versions in '
                    '`hypothesis.extra.django`.  Use the Hypothesis variants '
                    'to ensure that each example is run in a separate '
                    'database transaction.' % qualname(type(runner)))

            state = StateForActualGivenExecution(
                test_runner,
                search_strategy,
                test,
                settings,
                random,
                had_seed=wrapped_test._hypothesis_internal_use_seed)

            reproduce_failure = \
                wrapped_test._hypothesis_internal_use_reproduce_failure

            if reproduce_failure is not None:
                expected_version, failure = reproduce_failure
                if expected_version != __version__:
                    raise InvalidArgument(
                        ('Attempting to reproduce a failure from a different '
                         'version of Hypothesis. This failure is from %s, but '
                         'you are currently running %r. Please change your '
                         'Hypothesis version to a matching one.') %
                        (expected_version, __version__))
                try:
                    state.execute(
                        ConjectureData.for_buffer(decode_failure(failure)),
                        print_example=True,
                        is_final=True,
                    )
                    raise DidNotReproduce(
                        'Expected the test to raise an error, but it '
                        'completed successfully.')
                except StopTest:
                    raise DidNotReproduce(
                        'The shape of the test data has changed in some way '
                        'from where this blob was defined. Are you sure '
                        "you're running the same test?")
                except UnsatisfiedAssumption:
                    raise DidNotReproduce(
                        'The test data failed to satisfy an assumption in the '
                        'test. Have you added it since this blob was '
                        'generated?')

            execute_explicit_examples(test_runner, test, wrapped_test,
                                      settings, arguments, kwargs)

            if settings.max_examples <= 0:
                return

            if not (Phase.reuse in settings.phases
                    or Phase.generate in settings.phases):
                return

            try:
                if isinstance(runner, TestCase) and hasattr(runner, 'subTest'):
                    subTest = runner.subTest
                    try:
                        setattr(runner, 'subTest', fake_subTest)
                        state.run()
                    finally:
                        setattr(runner, 'subTest', subTest)
                else:
                    state.run()
            except BaseException as e:
                generated_seed = \
                    wrapped_test._hypothesis_internal_use_generated_seed
                with local_settings(settings):
                    if not (state.failed_normally or generated_seed is None):
                        if running_under_pytest:
                            report(
                                'You can add @seed(%(seed)d) to this test or '
                                'run pytest with --hypothesis-seed=%(seed)d '
                                'to reproduce this failure.' %
                                {'seed': generated_seed})
                        else:
                            report('You can add @seed(%d) to this test to '
                                   'reproduce this failure.' %
                                   (generated_seed, ))
                    # The dance here is to avoid showing users long tracebacks
                    # full of Hypothesis internals they don't care about.
                    # We have to do this inline, to avoid adding another
                    # internal stack frame just when we've removed the rest.
                    if PY2:
                        # Python 2 doesn't have Exception.with_traceback(...);
                        # instead it has a three-argument form of the `raise`
                        # statement.  Unfortunately this is a SyntaxError on
                        # Python 3, and before Python 2.7.9 it was *also* a
                        # SyntaxError to use it in a nested function so we
                        # can't `exec` or `eval` our way out (BPO-21591).
                        # So unless we break some versions of Python 2, none
                        # of them get traceback elision.
                        raise
                    # On Python 3, we swap out the real traceback for our
                    # trimmed version.  Using a variable ensures that the line
                    # which will actually appear in trackbacks is as clear as
                    # possible - "raise the_error_hypothesis_found".
                    the_error_hypothesis_found = \
                        e.with_traceback(get_trimmed_traceback())
                    raise the_error_hypothesis_found
예제 #23
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return
        if self.interesting_examples:
            # The example database has failing examples from a previous run,
            # so we'd rather report that they're still failing ASAP than take
            # the time to look for additional failures.
            return

        self.debug("Generating new examples")

        zero_data = self.cached_test_function(hbytes(BUFFER_SIZE))
        if zero_data.status > Status.OVERRUN:
            self.__data_cache.pin(zero_data.buffer)

        if zero_data.status == Status.OVERRUN or (
            zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE
        ):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        self.health_check_state = HealthCheckState()

        def should_generate_more():
            # End the generation phase where we would have ended it if no bugs had
            # been found.  This reproduces the exit logic in `self.test_function`,
            # but with the important distinction that this clause will move on to
            # the shrinking phase having found one or more bugs, while the other
            # will exit having found zero bugs.
            if (
                self.valid_examples >= self.settings.max_examples
                or self.call_count >= max(self.settings.max_examples * 10, 1000)
                or (
                    self.best_examples_of_observed_targets
                    and self.valid_examples * 2 >= self.settings.max_examples
                    and self.should_optimise
                )
            ):  # pragma: no cover
                return False

            # If we haven't found a bug, keep looking - if we hit any limits on
            # the number of tests to run that will raise an exception and stop
            # the run.
            if not self.interesting_examples:
                return True
            # If we've found a bug and won't report more than one, stop looking.
            elif not self.settings.report_multiple_bugs:
                return False
            assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count
            # Otherwise, keep searching for between ten and 'a heuristic' calls.
            # We cap 'calls after first bug' so errors are reported reasonably
            # soon even for tests that are allowed to run for a very long time,
            # or sooner if the latest half of our test effort has been fruitless.
            return self.call_count < MIN_TEST_CALLS or self.call_count < min(
                self.first_bug_found_at + 1000, self.last_bug_found_at * 2
            )

        # We attempt to use the size of the minimal generated test case starting
        # from a given novel prefix as a guideline to generate smaller test
        # cases for an initial period, by restriscting ourselves to test cases
        # that are not much larger than it.
        #
        # Calculating the actual minimal generated test case is hard, so we
        # take a best guess that zero extending a prefix produces the minimal
        # test case starting with that prefix (this is true for our built in
        # strategies). This is only a reasonable thing to do if the resulting
        # test case is valid. If we regularly run into situations where it is
        # not valid then this strategy is a waste of time, so we want to
        # abandon it early. In order to do this we track how many times in a
        # row it has failed to work, and abort small test case generation when
        # it has failed too many times in a row.
        consecutive_zero_extend_is_invalid = 0

        while should_generate_more():
            prefix = self.generate_novel_prefix()
            assert len(prefix) <= BUFFER_SIZE

            # We control growth during initial example generation, for two
            # reasons:
            #
            # * It gives us an opportunity to find small examples early, which
            #   gives us a fast path for easy to find bugs.
            # * It avoids low probability events where we might end up
            #   generating very large examples during health checks, which
            #   on slower machines can trigger HealthCheck.too_slow.
            #
            # The heuristic we use is that we attempt to estimate the smallest
            # extension of this prefix, and limit the size to no more than
            # an order of magnitude larger than that. If we fail to estimate
            # the size accurately, we skip over this prefix and try again.
            #
            # We need to tune the example size based on the initial prefix,
            # because any fixed size might be too small, and any size based
            # on the strategy in general can fall afoul of strategies that
            # have very different sizes for different prefixes.
            small_example_cap = clamp(10, self.settings.max_examples // 10, 50)

            if (
                self.valid_examples <= small_example_cap
                and self.call_count <= 5 * small_example_cap
                and not self.interesting_examples
                and consecutive_zero_extend_is_invalid < 5
            ):
                minimal_example = self.cached_test_function(
                    prefix + hbytes(BUFFER_SIZE - len(prefix))
                )

                if minimal_example.status < Status.VALID:
                    consecutive_zero_extend_is_invalid += 1
                    continue

                consecutive_zero_extend_is_invalid = 0

                minimal_extension = len(minimal_example.buffer) - len(prefix)

                max_length = min(len(prefix) + minimal_extension * 10, BUFFER_SIZE)

                # We could end up in a situation where even though the prefix was
                # novel when we generated it, because we've now tried zero extending
                # it not all possible continuations of it will be novel. In order to
                # avoid making redundant test calls, we rerun it in simulation mode
                # first. If this has a predictable result, then we don't bother
                # running the test function for real here. If however we encounter
                # some novel behaviour, we try again with the real test function,
                # starting from the new novel prefix that has discovered.
                try:
                    trial_data = self.new_conjecture_data(
                        prefix=prefix, max_length=max_length
                    )
                    self.tree.simulate_test_function(trial_data)
                    continue
                except PreviouslyUnseenBehaviour:
                    pass

                # If the simulation entered part of the tree that has been killed,
                # we don't want to run this.
                if trial_data.observer.killed:
                    continue

                # We might have hit the cap on number of examples we should
                # run when calculating the minimal example.
                if not should_generate_more():
                    break

                prefix = trial_data.buffer
            else:
                max_length = BUFFER_SIZE

            data = self.new_conjecture_data(prefix=prefix, max_length=max_length)

            self.test_function(data)

            # A thing that is often useful but rarely happens by accident is
            # to generate the same value at multiple different points in the
            # test case.
            #
            # Rather than make this the responsibility of individual strategies
            # we implement a small mutator that just takes parts of the test
            # case with the same label and tries replacing one of them with a
            # copy of the other and tries running it. If we've made a good
            # guess about what to put where, this will run a similar generated
            # test case with more duplication.
            if (
                # An OVERRUN doesn't have enough information about the test
                # case to mutate, so we just skip those.
                data.status >= Status.INVALID
                # This has a tendency to trigger some weird edge cases during
                # generation so we don't let it run until we're done with the
                # health checks.
                and self.health_check_state is None
            ):
                initial_calls = self.call_count
                failed_mutations = 0
                while (
                    should_generate_more()
                    # We implement fairly conservative checks for how long we
                    # we should run mutation for, as it's generally not obvious
                    # how helpful it is for any given test case.
                    and self.call_count <= initial_calls + 5
                    and failed_mutations <= 5
                ):
                    groups = defaultdict(list)
                    for ex in data.examples:
                        groups[ex.label, ex.depth].append(ex)

                    groups = [v for v in groups.values() if len(v) > 1]

                    if not groups:
                        break

                    group = self.random.choice(groups)

                    ex1, ex2 = sorted(
                        self.random.sample(group, 2), key=lambda i: i.index
                    )
                    assert ex1.end <= ex2.start

                    replacements = [data.buffer[e.start : e.end] for e in [ex1, ex2]]

                    replacement = self.random.choice(replacements)

                    try:
                        # We attempt to replace both the the examples with
                        # whichever choice we made. Note that this might end
                        # up messing up and getting the example boundaries
                        # wrong - labels matching are only a best guess as to
                        # whether the two are equivalent - but it doesn't
                        # really matter. It may not achieve the desired result
                        # but it's still a perfectly acceptable choice sequence.
                        # to try.
                        new_data = self.cached_test_function(
                            data.buffer[: ex1.start]
                            + replacement
                            + data.buffer[ex1.end : ex2.start]
                            + replacement
                            + data.buffer[ex2.end :],
                            # We set error_on_discard so that we don't end up
                            # entering parts of the tree we consider redundant
                            # and not worth exploring.
                            error_on_discard=True,
                            extend=BUFFER_SIZE,
                        )
                    except ContainsDiscard:
                        failed_mutations += 1
                        continue

                    if (
                        new_data.status >= data.status
                        and data.buffer != new_data.buffer
                        and all(
                            k in new_data.target_observations
                            and new_data.target_observations[k] >= v
                            for k, v in data.target_observations.items()
                        )
                    ):
                        data = new_data
                        failed_mutations = 0
                    else:
                        failed_mutations += 1
예제 #24
0
    def record_for_health_check(self, data):
        # Once we've actually found a bug, there's no point in trying to run
        # health checks - they'll just mask the actually important information.
        if data.status == Status.INTERESTING:
            self.health_check_state = None

        state = self.health_check_state

        if state is None:
            return

        state.draw_times.extend(data.draw_times)

        if data.status == Status.VALID:
            state.valid_examples += 1
        elif data.status == Status.INVALID:
            state.invalid_examples += 1
        else:
            assert data.status == Status.OVERRUN
            state.overrun_examples += 1

        max_valid_draws = 10
        max_invalid_draws = 50
        max_overrun_draws = 20

        assert state.valid_examples <= max_valid_draws

        if state.valid_examples == max_valid_draws:
            self.health_check_state = None
            return

        if state.overrun_examples == max_overrun_draws:
            fail_health_check(
                self.settings,
                (
                    "Examples routinely exceeded the max allowable size. "
                    "(%d examples overran while generating %d valid ones)"
                    ". Generating examples this large will usually lead to"
                    " bad results. You could try setting max_size parameters "
                    "on your collections and turning "
                    "max_leaves down on recursive() calls."
                )
                % (state.overrun_examples, state.valid_examples),
                HealthCheck.data_too_large,
            )
        if state.invalid_examples == max_invalid_draws:
            fail_health_check(
                self.settings,
                (
                    "It looks like your strategy is filtering out a lot "
                    "of data. Health check found %d filtered examples but "
                    "only %d good ones. This will make your tests much "
                    "slower, and also will probably distort the data "
                    "generation quite a lot. You should adapt your "
                    "strategy to filter less. This can also be caused by "
                    "a low max_leaves parameter in recursive() calls"
                )
                % (state.invalid_examples, state.valid_examples),
                HealthCheck.filter_too_much,
            )

        draw_time = sum(state.draw_times)

        if draw_time > 1.0:
            fail_health_check(
                self.settings,
                (
                    "Data generation is extremely slow: Only produced "
                    "%d valid examples in %.2f seconds (%d invalid ones "
                    "and %d exceeded maximum size). Try decreasing "
                    "size of the data you're generating (with e.g."
                    "max_size or max_leaves parameters)."
                )
                % (
                    state.valid_examples,
                    draw_time,
                    state.invalid_examples,
                    state.overrun_examples,
                ),
                HealthCheck.too_slow,
            )
예제 #25
0
    def pytest_runtest_call(item):
        if not (hasattr(item, "obj") and "hypothesis" in sys.modules):
            yield
            return

        from hypothesis import core
        from hypothesis.internal.detection import is_hypothesis_test

        core.running_under_pytest = True

        if not is_hypothesis_test(item.obj):
            # If @given was not applied, check whether other hypothesis
            # decorators were applied, and raise an error if they were.
            if getattr(item.obj, "is_hypothesis_strategy_function", False):
                from hypothesis.errors import InvalidArgument

                raise InvalidArgument(
                    f"{item.nodeid} is a function that returns a Hypothesis strategy, "
                    "but pytest has collected it as a test function.  This is useless "
                    "as the function body will never be executed.  To define a test "
                    "function, use @given instead of @composite.")
            message = "Using `@%s` on a test without `@given` is completely pointless."
            for name, attribute in [
                ("example", "hypothesis_explicit_examples"),
                ("seed", "_hypothesis_internal_use_seed"),
                ("settings", "_hypothesis_internal_settings_applied"),
                ("reproduce_example",
                 "_hypothesis_internal_use_reproduce_failure"),
            ]:
                if hasattr(item.obj, attribute):
                    from hypothesis.errors import InvalidArgument

                    raise InvalidArgument(message % (name, ))
            yield
        else:
            from hypothesis import HealthCheck, settings
            from hypothesis.internal.escalation import current_pytest_item
            from hypothesis.internal.healthcheck import fail_health_check
            from hypothesis.reporting import with_reporter
            from hypothesis.statistics import collector, describe_statistics

            # Retrieve the settings for this test from the test object, which
            # is normally a Hypothesis wrapped_test wrapper. If this doesn't
            # work, the test object is probably something weird
            # (e.g a stateful test wrapper), so we skip the function-scoped
            # fixture check.
            settings = getattr(item.obj, "_hypothesis_internal_use_settings",
                               None)

            # Check for suspicious use of function-scoped fixtures, but only
            # if the corresponding health check is not suppressed.
            if (settings is not None and HealthCheck.function_scoped_fixture
                    not in settings.suppress_health_check):
                # Warn about function-scoped fixtures, excluding autouse fixtures because
                # the advice is probably not actionable and the status quo seems OK...
                # See https://github.com/HypothesisWorks/hypothesis/issues/377 for detail.
                argnames = None
                for fx_defs in item._request._fixturemanager.getfixtureinfo(
                        node=item, func=item.function,
                        cls=None).name2fixturedefs.values():
                    if argnames is None:
                        argnames = frozenset(
                            signature(item.function).parameters)
                    for fx in fx_defs:
                        if fx.argname in argnames:
                            active_fx = item._request._get_active_fixturedef(
                                fx.argname)
                            if active_fx.scope == "function":
                                fail_health_check(
                                    settings,
                                    _FIXTURE_MSG.format(
                                        fx.argname, item.nodeid),
                                    HealthCheck.function_scoped_fixture,
                                )

            if item.get_closest_marker("parametrize") is not None:
                # Give every parametrized test invocation a unique database key
                key = item.nodeid.encode()
                item.obj.hypothesis.inner_test._hypothesis_internal_add_digest = key

            store = StoringReporter(item.config)

            def note_statistics(stats):
                stats["nodeid"] = item.nodeid
                item.hypothesis_statistics = base64.b64encode(
                    describe_statistics(stats).encode()).decode()

            with collector.with_value(note_statistics):
                with with_reporter(store):
                    with current_pytest_item.with_value(item):
                        yield
            if store.results:
                item.hypothesis_report_information = list(store.results)
예제 #26
0
    def pytest_runtest_call(item):
        if not hasattr(item, "obj"):
            yield
        elif not is_hypothesis_test(item.obj):
            # If @given was not applied, check whether other hypothesis
            # decorators were applied, and raise an error if they were.
            if getattr(item.obj, "is_hypothesis_strategy_function", False):
                raise InvalidArgument(
                    "%s is a function that returns a Hypothesis strategy, but pytest "
                    "has collected it as a test function.  This is useless as the "
                    "function body will never be executed.  To define a test "
                    "function, use @given instead of @composite." %
                    (item.nodeid, ))
            message = "Using `@%s` on a test without `@given` is completely pointless."
            for name, attribute in [
                ("example", "hypothesis_explicit_examples"),
                ("seed", "_hypothesis_internal_use_seed"),
                ("settings", "_hypothesis_internal_settings_applied"),
                ("reproduce_example",
                 "_hypothesis_internal_use_reproduce_failure"),
            ]:
                if hasattr(item.obj, attribute):
                    raise InvalidArgument(message % (name, ))
            yield
        else:
            # Retrieve the settings for this test from the test object, which
            # is normally a Hypothesis wrapped_test wrapper. If this doesn't
            # work, the test object is probably something weird
            # (e.g a stateful test wrapper), so we skip the function-scoped
            # fixture check.
            settings = getattr(item.obj, "_hypothesis_internal_use_settings",
                               None)

            # Check for suspicious use of function-scoped fixtures, but only
            # if the corresponding health check is not suppressed.
            if (settings is not None and HealthCheck.function_scoped_fixture
                    not in settings.suppress_health_check):
                # Warn about function-scoped fixtures, excluding autouse fixtures because
                # the advice is probably not actionable and the status quo seems OK...
                # See https://github.com/HypothesisWorks/hypothesis/issues/377 for detail.
                msg = (
                    "%s uses the %r fixture, which is reset between function calls but not "
                    "between test cases generated by `@given(...)`.  You can change it to "
                    "a module- or session-scoped fixture if it is safe to reuse; if not "
                    "we recommend using a context manager inside your test function.  See "
                    "https://docs.pytest.org/en/latest/fixture.html#sharing-test-data "
                    "for details on fixture scope.")
                argnames = None
                for fx_defs in item._request._fixturemanager.getfixtureinfo(
                        node=item, func=item.function,
                        cls=None).name2fixturedefs.values():
                    if argnames is None:
                        argnames = frozenset(
                            signature(item.function).parameters)
                    for fx in fx_defs:
                        if fx.argname in argnames:
                            active_fx = item._request._get_active_fixturedef(
                                fx.argname)
                            if active_fx.scope == "function":
                                fail_health_check(
                                    settings,
                                    msg % (item.nodeid, fx.argname),
                                    HealthCheck.function_scoped_fixture,
                                )

            if item.get_closest_marker("parametrize") is not None:
                # Give every parametrized test invocation a unique database key
                key = item.nodeid.encode("utf-8")
                item.obj.hypothesis.inner_test._hypothesis_internal_add_digest = key

            store = StoringReporter(item.config)

            def note_statistics(stats):
                stats["nodeid"] = item.nodeid
                item.hypothesis_statistics = base64.b64encode(
                    describe_statistics(stats).encode()).decode()

            with collector.with_value(note_statistics):
                with with_reporter(store):
                    yield
            if store.results:
                item.hypothesis_report_information = list(store.results)