Esempio n. 1
0
    def run_engine(self):
        """Run the test function many times, on database input and generated
        input, using the Conjecture engine.
        """
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        try:
            database_key = self.wrapped_test._hypothesis_internal_database_key
        except AttributeError:
            if global_force_seed is None:
                database_key = function_digest(self.test)
            else:
                database_key = None

        runner = ConjectureRunner(
            self._execute_once_for_engine,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        # Use the Conjecture engine to run the test function many times
        # on different inputs.
        runner.run()
        note_engine_for_statistics(runner)

        if runner.call_count == 0:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                runner.interesting_examples.values(),
                key=lambda d: sort_key(d.buffer),
                reverse=True,
            )
        else:
            if runner.valid_examples == 0:
                raise Unsatisfiable(
                    "Unable to satisfy assumptions of hypothesis %s." %
                    (get_pretty_function_description(self.test), ))

        if not self.falsifying_examples:
            return
        elif not self.settings.report_multiple_bugs:
            # Pretend that we only found one failure, by discarding the others.
            del self.falsifying_examples[:-1]

        # The engine found one or more failures, so we need to reproduce and
        # report them.

        self.failed_normally = True

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            info = falsifying_example.extra_information

            ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
            self.__was_flaky = False
            assert info.__expected_exception is not None
            try:
                self.execute_once(
                    ran_example,
                    print_example=not self.is_find,
                    is_final=True,
                    expected_failure=(
                        info.__expected_exception,
                        info.__expected_traceback,
                    ),
                )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    "Unreliable assumption: An example which satisfied "
                    "assumptions on the first run now fails it.")
            except BaseException as e:
                if len(self.falsifying_examples) <= 1:
                    # There is only one failure, so we can report it by raising
                    # it directly.
                    raise

                # We are reporting multiple failures, so we need to manually
                # print each exception's stack trace and information.
                tb = get_trimmed_traceback()
                report("".join(traceback.format_exception(type(e), e, tb)))

            finally:  # pragma: no cover
                # Mostly useful for ``find`` and ensuring that objects that
                # hold on to a reference to ``data`` know that it's now been
                # finished and they shouldn't attempt to draw more data from
                # it.
                ran_example.freeze()

                # This section is in fact entirely covered by the tests in
                # test_reproduce_failure, but it seems to trigger a lovely set
                # of coverage bugs: The branches show up as uncovered (despite
                # definitely being covered - you can add an assert False else
                # branch to verify this and see it fail - and additionally the
                # second branch still complains about lack of coverage even if
                # you add a pragma: no cover to it!
                # See https://bitbucket.org/ned/coveragepy/issues/623/
                if self.settings.print_blob:
                    report(("\nYou can reproduce this example by temporarily "
                            "adding @reproduce_failure(%r, %r) as a decorator "
                            "on your test case") %
                           (__version__,
                            encode_failure(falsifying_example.buffer)))
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ("Hypothesis found %d distinct failures, but %d of them "
                 "exhibited some sort of flaky behaviour.") %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(("Hypothesis found %d distinct failures.") %
                                   (len(self.falsifying_examples)))
Esempio n. 2
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = function_digest(self.test)
        else:
            database_key = None
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        try:
            runner.run()
        finally:
            self.used_examples_from_database = runner.used_examples_from_database
        note_engine_for_statistics(runner)

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.call_count == 0:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer),
                reverse=True,
            )
        else:
            if runner.valid_examples == 0:
                raise Unsatisfiable(
                    "Unable to satisfy assumptions of hypothesis %s." %
                    (get_pretty_function_description(self.test), ))

        if not self.falsifying_examples:
            return

        self.failed_normally = True

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(
                    ran_example,
                    print_example=True,
                    is_final=True,
                    expected_failure=(
                        falsifying_example.__expected_exception,
                        falsifying_example.__expected_traceback,
                    ),
                )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    "Unreliable assumption: An example which satisfied "
                    "assumptions on the first run now fails it.")
            except BaseException as e:
                if len(self.falsifying_examples) <= 1:
                    raise
                tb = get_trimmed_traceback()
                report("".join(traceback.format_exception(type(e), e, tb)))
            finally:  # pragma: no cover
                # This section is in fact entirely covered by the tests in
                # test_reproduce_failure, but it seems to trigger a lovely set
                # of coverage bugs: The branches show up as uncovered (despite
                # definitely being covered - you can add an assert False else
                # branch to verify this and see it fail - and additionally the
                # second branch still complains about lack of coverage even if
                # you add a pragma: no cover to it!
                # See https://bitbucket.org/ned/coveragepy/issues/623/
                if self.settings.print_blob is not PrintSettings.NEVER:
                    failure_blob = encode_failure(falsifying_example.buffer)
                    # Have to use the example we actually ran, not the original
                    # falsifying example! Otherwise we won't catch problems
                    # where the repr of the generated example doesn't parse.
                    can_use_repr = ran_example.can_reproduce_example_from_repr
                    if self.settings.print_blob is PrintSettings.ALWAYS or (
                            self.settings.print_blob is PrintSettings.INFER
                            and self.settings.verbosity >= Verbosity.normal
                            and not can_use_repr and len(failure_blob) < 200):
                        report((
                            "\nYou can reproduce this example by temporarily "
                            "adding @reproduce_failure(%r, %r) as a decorator "
                            "on your test case") % (__version__, failure_blob))
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ("Hypothesis found %d distinct failures, but %d of them "
                 "exhibited some sort of flaky behaviour.") %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(("Hypothesis found %d distinct failures.") %
                                   (len(self.falsifying_examples)))
Esempio n. 3
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        database_key = str_to_bytes(fully_qualified_name(self.test))
        self.start_time = time.time()
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        runner.run()
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time
        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer),
                reverse=True)
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.') % (self.settings.timeout,
                                                runner.valid_examples),
                                 self.settings)
            if runner.valid_examples < min(
                    self.settings.min_satisfying_examples,
                    self.settings.max_examples,
            ) and not (runner.exit_reason == ExitReason.finished
                       and self.at_least_one_success):
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for '
                         '%s. Only found %d examples in ' + '%.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        ('Unable to satisfy assumptions of hypothesis '
                         '%s. Only %d examples considered '
                         'satisfied assumptions') % (
                             get_pretty_function_description(self.test),
                             runner.valid_examples,
                         ))

        if not self.falsifying_examples:
            return

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            self.__was_flaky = False
            raised_exception = False
            try:
                with self.settings:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(self.search_strategy,
                                          self.test,
                                          print_example=True,
                                          is_final=True))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.')
            except:
                if len(self.falsifying_examples) <= 1:
                    raise
                raised_exception = True
                report(traceback.format_exc())

            if not raised_exception:
                report(
                    'Failed to reproduce exception. Expected: \n' +
                    falsifying_example.__expected_exception, )

                filter_message = (
                    'Unreliable test data: Failed to reproduce a failure '
                    'and then when it came to recreating the example in '
                    'order to print the test data with a flaky result '
                    'the example was filtered out (by e.g. a '
                    'call to filter in your strategy) when we didn\'t '
                    'expect it to be.')

                try:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(self.search_strategy,
                                          test_is_flaky(
                                              self.test,
                                              self.repr_for_last_exception),
                                          print_example=True,
                                          is_final=True))
                except (UnsatisfiedAssumption, StopTest):
                    self.__flaky(filter_message)
                except Flaky as e:
                    if len(self.falsifying_examples) > 1:
                        self.__flaky(e.args[0])
                    else:
                        raise

            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ('Hypothesis found %d distinct failures, but %d of them '
                 'exhibited some sort of flaky behaviour.') %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(('Hypothesis found %d distinct failures.') %
                                   (len(self.falsifying_examples, )))
Esempio n. 4
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = str_to_bytes(fully_qualified_name(self.test))
        else:
            database_key = None
        self.start_time = time.time()
        global in_given
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )

        if in_given or self.collector is None:
            runner.run()
        else:  # pragma: no cover
            in_given = True
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                runner.run()
            finally:
                in_given = False
                sys.settrace(original_trace)
                self.used_examples_from_database = \
                    runner.used_examples_from_database
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.used_examples_from_database:
            if self.settings.derandomize:
                note_deprecation(
                    'In future derandomize will imply database=None, but your '
                    'test is currently using examples from the database. To '
                    'get the future behaviour, update your settings to '
                    'include database=None.')
            if self.__had_seed:
                note_deprecation(
                    'In future use of @seed will imply database=None in your '
                    'settings, but your test is currently using examples from '
                    'the database. To get the future behaviour, update your '
                    'settings for this test to include database=None.')

        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.call_count == 0:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer),
                reverse=True)
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.') % (self.settings.timeout,
                                                runner.valid_examples),
                                 self.settings)
            if runner.valid_examples == 0:
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for %s. Only found %d examples in %.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        'Unable to satisfy assumptions of hypothesis %s.' %
                        (get_pretty_function_description(self.test), ))

        if not self.falsifying_examples:
            return

        self.failed_normally = True

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(ran_example,
                             print_example=True,
                             is_final=True,
                             expected_failure=(
                                 falsifying_example.__expected_exception,
                                 falsifying_example.__expected_traceback,
                             ))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.')
            except BaseException:
                if len(self.falsifying_examples) <= 1:
                    raise
                report(traceback.format_exc())
            finally:  # pragma: no cover
                # This section is in fact entirely covered by the tests in
                # test_reproduce_failure, but it seems to trigger a lovely set
                # of coverage bugs: The branches show up as uncovered (despite
                # definitely being covered - you can add an assert False else
                # branch to verify this and see it fail - and additionally the
                # second branch still complains about lack of coverage even if
                # you add a pragma: no cover to it!
                # See https://bitbucket.org/ned/coveragepy/issues/623/
                if self.settings.print_blob is not PrintSettings.NEVER:
                    failure_blob = encode_failure(falsifying_example.buffer)
                    # Have to use the example we actually ran, not the original
                    # falsifying example! Otherwise we won't catch problems
                    # where the repr of the generated example doesn't parse.
                    can_use_repr = ran_example.can_reproduce_example_from_repr
                    if (self.settings.print_blob is PrintSettings.ALWAYS or
                        (self.settings.print_blob is PrintSettings.INFER
                         and not can_use_repr and len(failure_blob) < 200)):
                        report((
                            '\n'
                            'You can reproduce this example by temporarily '
                            'adding @reproduce_failure(%r, %r) as a decorator '
                            'on your test case') % (
                                __version__,
                                failure_blob,
                            ))
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ('Hypothesis found %d distinct failures, but %d of them '
                 'exhibited some sort of flaky behaviour.') %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(('Hypothesis found %d distinct failures.') %
                                   (len(self.falsifying_examples, )))
Esempio n. 5
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = str_to_bytes(fully_qualified_name(self.test))
        else:
            database_key = None
        self.start_time = time.time()
        global in_given
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings, random=self.random,
            database_key=database_key,
        )

        if in_given or self.collector is None:
            runner.run()
        else:  # pragma: no cover
            in_given = True
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                runner.run()
            finally:
                in_given = False
                sys.settrace(original_trace)
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.used_examples_from_database:
            if self.settings.derandomize:
                note_deprecation(
                    'In future derandomize will imply database=None, but your '
                    'test is currently using examples from the database. To '
                    'get the future behaviour, update your settings to '
                    'include database=None.'
                )
            if self.__had_seed:
                note_deprecation(
                    'In future use of @seed will imply database=None in your '
                    'settings, but your test is currently using examples from '
                    'the database. To get the future behaviour, update your '
                    'settings for this test to include database=None.'
                )

        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer), reverse=True
            )
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.'
                ) % (
                    self.settings.timeout, runner.valid_examples),
                    self.settings)
            if runner.valid_examples < min(
                self.settings.min_satisfying_examples,
                self.settings.max_examples,
            ) and not (
                runner.exit_reason == ExitReason.finished and
                self.at_least_one_success
            ):
                if timed_out:
                    raise Timeout((
                        'Ran out of time before finding a satisfying '
                        'example for '
                        '%s. Only found %d examples in ' +
                        '%.2fs.'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples, run_time
                    ))
                else:
                    raise Unsatisfiable((
                        'Unable to satisfy assumptions of hypothesis '
                        '%s. Only %d examples considered '
                        'satisfied assumptions'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples,))

        if not self.falsifying_examples:
            return

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(
                    ConjectureData.for_buffer(falsifying_example.buffer),
                    print_example=True, is_final=True,
                    expected_failure=(
                        falsifying_example.__expected_exception,
                        falsifying_example.__expected_traceback,
                    )
                )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )
            except BaseException:
                if len(self.falsifying_examples) <= 1:
                    raise
                report(traceback.format_exc())
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky((
                'Hypothesis found %d distinct failures, but %d of them '
                'exhibited some sort of flaky behaviour.') % (
                    len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures((
                'Hypothesis found %d distinct failures.') % (
                    len(self.falsifying_examples,)))