def evaluate_test_data(self, data): try: result = self.execute(data) if result is not None: fail_health_check( self.settings, ('Tests run under @given should return None, but ' '%s returned %r instead.') % (self.test.__name__, result), HealthCheck.return_value) except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ) + EXCEPTIONS_TO_RERAISE: raise except EXCEPTIONS_TO_FAIL as e: escalate_hypothesis_internal_error() tb = get_trimmed_traceback() data.__expected_traceback = ''.join( traceback.format_exception(type(e), e, tb)) data.__expected_exception = e verbose_report(data.__expected_traceback) origin = traceback.extract_tb(tb)[-1] filename = origin[0] lineno = origin[1] data.mark_interesting((type(e), filename, lineno))
def evaluate_test_data(self, data): try: result = self.execute(data) if result is not None: fail_health_check(self.settings, ( 'Tests run under @given should return None, but ' '%s returned %r instead.' ) % (self.test.__name__, result), HealthCheck.return_value) return False except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ) + EXCEPTIONS_TO_RERAISE: raise except Exception as e: escalate_hypothesis_internal_error() data.__expected_traceback = traceback.format_exc() data.__expected_exception = e verbose_report(data.__expected_traceback) error_class, _, tb = sys.exc_info() origin = traceback.extract_tb(tb)[-1] filename = origin[0] lineno = origin[1] data.mark_interesting((error_class, filename, lineno))
def _execute_once_for_engine(self, data): """Wrapper around ``execute_once`` that intercepts test failure exceptions and single-test control exceptions, and turns them into appropriate method calls to `data` instead. This allows the engine to assume that any exception other than ``StopTest`` must be a fatal error, and should stop the entire engine. """ try: result = self.execute_once(data) if result is not None: fail_health_check( self.settings, ("Tests run under @given should return None, but " "%s returned %r instead.") % (self.test.__name__, result), HealthCheck.return_value, ) except UnsatisfiedAssumption: # An "assume" check failed, so instead we inform the engine that # this test run was invalid. data.mark_invalid() except StopTest: # The engine knows how to handle this control exception, so it's # OK to re-raise it. raise except ( HypothesisDeprecationWarning, FailedHealthCheck, ) + skip_exceptions_to_reraise(): # These are fatal errors or control exceptions that should stop the # engine, so we re-raise them. raise except failure_exceptions_to_catch() as e: # If the error was raised by Hypothesis-internal code, re-raise it # as a fatal error instead of treating it as a test failure. escalate_hypothesis_internal_error() if data.frozen: # This can happen if an error occurred in a finally # block somewhere, suppressing our original StopTest. # We raise a new one here to resume normal operation. raise StopTest(data.testcounter) else: # The test failed by raising an exception, so we inform the # engine that this test run was interesting. This is the normal # path for test runs that fail. tb = get_trimmed_traceback() info = data.extra_information info.__expected_traceback = "".join( traceback.format_exception(type(e), e, tb)) info.__expected_exception = e verbose_report(info.__expected_traceback) origin = traceback.extract_tb(tb)[-1] filename = origin[0] lineno = origin[1] data.mark_interesting((type(e), filename, lineno))
def evaluate_test_data(self, data): try: if self.collector is None: result = self.execute(data) else: # pragma: no cover # This should always be a no-op, but the coverage tracer has # a bad habit of resurrecting itself. original = sys.gettrace() sys.settrace(None) try: self.collector.data = {} result = self.execute(data, collect=True) finally: sys.settrace(original) covdata = CoverageData() self.collector.save_data(covdata) self.coverage_data.update(covdata) for filename in covdata.measured_files(): if is_hypothesis_file(filename): continue data.tags.update( arc(filename, source, target) for source, target in covdata.arcs(filename)) if result is not None and self.settings.perform_health_check: fail_health_check( self.settings, ('Tests run under @given should return None, but ' '%s returned %r instead.') % (self.test.__name__, result), HealthCheck.return_value) self.at_least_one_success = True return False except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ) + exceptions_to_reraise: raise except Exception as e: escalate_hypothesis_internal_error() data.__expected_traceback = traceback.format_exc() data.__expected_exception = e verbose_report(data.__expected_traceback) error_class, _, tb = sys.exc_info() origin = traceback.extract_tb(tb)[-1] filename = origin[0] lineno = origin[1] data.mark_interesting((error_class, filename, lineno))
def check_invariants(self, settings): for invar in self.invariants(): if self._initialize_rules_to_run and not invar.check_during_init: continue if not all(precond(self) for precond in invar.preconditions): continue result = invar.function(self) if result is not None: fail_health_check( settings, "The return value of an @invariant is always ignored, but " f"{invar.function.__qualname__} returned {result!r} " "instead of None", HealthCheck.return_value, )
def check_invariants(self, settings): for invar in self.invariants(): if self._initialize_rules_to_run and not invar.check_during_init: continue if not all(precond(self) for precond in invar.preconditions): continue if (current_build_context().is_final or settings.verbosity >= Verbosity.debug): report(f"state.{invar.function.__name__}()") result = invar.function(self) if result is not None: fail_health_check( settings, "The return value of an @invariant is always ignored, but " f"{invar.function.__qualname__} returned {result!r} " "instead of None", HealthCheck.return_value, )
def evaluate_test_data(self, data): try: result = self.execute(data) if result is not None: fail_health_check( self.settings, ( "Tests run under @given should return None, but " "%s returned %r instead." ) % (self.test.__name__, result), HealthCheck.return_value, ) except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ) + skip_exceptions_to_reraise(): raise except failure_exceptions_to_catch() as e: escalate_hypothesis_internal_error() if data.frozen: # This can happen if an error occurred in a finally # block somewhere, suppressing our original StopTest. # We raise a new one here to resume normal operation. raise StopTest(data.testcounter) else: tb = get_trimmed_traceback() info = data.extra_information info.__expected_traceback = "".join( traceback.format_exception(type(e), e, tb) ) info.__expected_exception = e verbose_report(info.__expected_traceback) origin = traceback.extract_tb(tb)[-1] filename = origin[0] lineno = origin[1] data.mark_interesting((type(e), filename, lineno))
def record_for_health_check(self, data): # Once we've actually found a bug, there's no point in trying to run # health checks - they'll just mask the actually important information. if data.status == Status.INTERESTING: self.health_check_state = None state = self.health_check_state if state is None: return state.draw_times.extend(data.draw_times) if data.status == Status.VALID: state.valid_examples += 1 elif data.status == Status.INVALID: state.invalid_examples += 1 else: assert data.status == Status.OVERRUN state.overrun_examples += 1 max_valid_draws = 10 max_invalid_draws = 50 max_overrun_draws = 20 assert state.valid_examples <= max_valid_draws if state.valid_examples == max_valid_draws: self.health_check_state = None return if state.overrun_examples == max_overrun_draws: fail_health_check( self.settings, ("Examples routinely exceeded the max allowable size. " "(%d examples overran while generating %d valid ones)" ". Generating examples this large will usually lead to" " bad results. You could try setting max_size parameters " "on your collections and turning " "max_leaves down on recursive() calls.") % (state.overrun_examples, state.valid_examples), HealthCheck.data_too_large, ) if state.invalid_examples == max_invalid_draws: fail_health_check( self.settings, ("It looks like your strategy is filtering out a lot " "of data. Health check found %d filtered examples but " "only %d good ones. This will make your tests much " "slower, and also will probably distort the data " "generation quite a lot. You should adapt your " "strategy to filter less. This can also be caused by " "a low max_leaves parameter in recursive() calls") % (state.invalid_examples, state.valid_examples), HealthCheck.filter_too_much, ) draw_time = sum(state.draw_times) if draw_time > 1.0: fail_health_check( self.settings, ("Data generation is extremely slow: Only produced " "%d valid examples in %.2f seconds (%d invalid ones " "and %d exceeded maximum size). Try decreasing " "size of the data you're generating (with e.g." "max_size or max_leaves parameters).") % ( state.valid_examples, draw_time, state.invalid_examples, state.overrun_examples, ), HealthCheck.too_slow, )
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, 'is_hypothesis_test', False): note_deprecation(( 'You have applied @given to test: %s more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'of the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())') % (test.__name__, ) ) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [name for name, value in generator_kwargs.items() if value is infer]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings ) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, 'runner', None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ('You have applied @given to the method %s, which is ' 'used by the unittest runner but is not itself a test.' ' This is not useful in any way.' % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( 'You have applied @given to a method on %s, but this ' 'class does not inherit from the supported versions in ' '`hypothesis.extra.django`. Use the Hypothesis variants ' 'to ensure that each example is run in a separate ' 'database transaction.' % qualname(type(runner)) ) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed ) reproduce_failure = \ wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument(( 'Attempting to reproduce a failure from a different ' 'version of Hypothesis. This failure is from %s, but ' 'you are currently running %r. Please change your ' 'Hypothesis version to a matching one.' ) % (expected_version, __version__)) try: state.execute(ConjectureData.for_buffer( decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( 'Expected the test to raise an error, but it ' 'completed successfully.' ) except StopTest: raise DidNotReproduce( 'The shape of the test data has changed in some way ' 'from where this blob was defined. Are you sure ' "you're running the same test?" ) except UnsatisfiedAssumption: raise DidNotReproduce( 'The test data failed to satisfy an assumption in the ' 'test. Have you added it since this blob was ' 'generated?' ) execute_explicit_examples( test_runner, test, wrapped_test, settings, arguments, kwargs ) if settings.max_examples <= 0: return if not ( Phase.reuse in settings.phases or Phase.generate in settings.phases ): return try: if isinstance(runner, TestCase) and hasattr(runner, 'subTest'): subTest = runner.subTest try: setattr(runner, 'subTest', fake_subTest) state.run() finally: setattr(runner, 'subTest', subTest) else: state.run() except BaseException: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None and not state.failed_normally: with local_settings(settings): if running_under_pytest: report( 'You can add @seed(%(seed)d) to this test or ' 'run pytest with --hypothesis-seed=%(seed)d ' 'to reproduce this failure.' % { 'seed': generated_seed}) else: report( 'You can add @seed(%d) to this test to ' 'reproduce this failure.' % (generated_seed,)) raise
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, "is_hypothesis_test", False): raise InvalidArgument( ( "You have applied @given to the test %s more than once, which " "wraps the test several times and is extremely slow. A " "similar effect can be gained by combining the arguments " "of the two calls to given. For example, instead of " "@given(booleans()) @given(integers()), you could write " "@given(booleans(), integers())" ) % (test.__name__,) ) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( "passed %s=infer for %s, but %s has no type annotation" % (name, test.__name__, name) ) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings, ) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, "runner", None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ( "You have applied @given to the method %s, which is " "used by the unittest runner but is not itself a test." " This is not useful in any way." % test.__name__ ) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( "You have applied @given to a method on %s, but this " "class does not inherit from the supported versions in " "`hypothesis.extra.django`. Use the Hypothesis variants " "to ensure that each example is run in a separate " "database transaction." % qualname(type(runner)) ) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed, ) reproduce_failure = wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ( "Attempting to reproduce a failure from a different " "version of Hypothesis. This failure is from %s, but " "you are currently running %r. Please change your " "Hypothesis version to a matching one." ) % (expected_version, __version__) ) try: state.execute( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( "Expected the test to raise an error, but it " "completed successfully." ) except StopTest: raise DidNotReproduce( "The shape of the test data has changed in some way " "from where this blob was defined. Are you sure " "you're running the same test?" ) except UnsatisfiedAssumption: raise DidNotReproduce( "The test data failed to satisfy an assumption in the " "test. Have you added it since this blob was " "generated?" ) execute_explicit_examples( test_runner, test, wrapped_test, settings, arguments, kwargs ) if settings.max_examples <= 0: return if not ( Phase.reuse in settings.phases or Phase.generate in settings.phases ): return try: if isinstance(runner, TestCase) and hasattr(runner, "subTest"): subTest = runner.subTest try: setattr(runner, "subTest", fake_subTest) state.run() finally: setattr(runner, "subTest", subTest) else: state.run() except BaseException as e: generated_seed = wrapped_test._hypothesis_internal_use_generated_seed with local_settings(settings): if not (state.failed_normally or generated_seed is None): if running_under_pytest: report( "You can add @seed(%(seed)d) to this test or " "run pytest with --hypothesis-seed=%(seed)d " "to reproduce this failure." % {"seed": generated_seed} ) else: report( "You can add @seed(%d) to this test to " "reproduce this failure." % (generated_seed,) ) # The dance here is to avoid showing users long tracebacks # full of Hypothesis internals they don't care about. # We have to do this inline, to avoid adding another # internal stack frame just when we've removed the rest. if PY2: # Python 2 doesn't have Exception.with_traceback(...); # instead it has a three-argument form of the `raise` # statement. Unfortunately this is a SyntaxError on # Python 3, and before Python 2.7.9 it was *also* a # SyntaxError to use it in a nested function so we # can't `exec` or `eval` our way out (BPO-21591). # So unless we break some versions of Python 2, none # of them get traceback elision. raise # On Python 3, we swap out the real traceback for our # trimmed version. Using a variable ensures that the line # which will actually appear in trackbacks is as clear as # possible - "raise the_error_hypothesis_found". the_error_hypothesis_found = e.with_traceback( get_trimmed_traceback() ) raise the_error_hypothesis_found
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = self.cached_test_function(hbytes( self.settings.buffer_size)) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > self.settings.buffer_size): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) if zero_data is not Overrun: # If the language starts with writes of length >= cap then there is # only one string in it: Everything after cap is forced to be zero (or # to be whatever value is written there). That means that once we've # tried the zero value, there's nothing left for us to do, so we # exit early here. for i in hrange(self.cap): if i not in zero_data.forced_indices: break else: self.exit_with(ExitReason.finished) self.health_check_state = HealthCheckState() count = 0 while not self.interesting_examples and ( count < 10 or self.health_check_state is not None): prefix = self.generate_novel_prefix() def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index:data.index + n] if len(result) < n: result += uniform(self.random, n - len(result)) else: result = uniform(self.random, n) return self.__zero_bound(data, result) targets_found = len(self.covering_examples) last_data = ConjectureData(max_length=self.settings.buffer_size, draw_bytes=draw_bytes) self.test_function(last_data) last_data.freeze() count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData(draw_bytes=draw_bytes, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() else: origin = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) data = ConjectureData(draw_bytes=mutator(origin), max_length=self.settings.buffer_size) self.test_function(data) data.freeze() if (data.status > origin.status or len(self.covering_examples) > targets_found): mutations = 0 elif data.status < origin.status or mutations >= 10: # Cap the variations of a single example and move on to # an entirely fresh start. Ten is an entirely arbitrary # constant, but it's been working well for years. mutations = 0 mutator = self._new_mutator() if getattr(data, "hit_zero_bound", False): zero_bound_queue.append(data) mutations += 1
def test_function(self, data): if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT: fail_health_check( self.settings, ("Your test has been running for at least five minutes. This " "is probably not what you intended, so by default Hypothesis " "turns it into an error."), HealthCheck.hung_test, ) self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise except BaseException: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) if data.status == Status.VALID: self.valid_examples += 1 # Record the test result in the tree, to avoid unnecessary work in # the future. # The tree has two main uses: # 1. It is mildly useful in some cases during generation where there is # a high probability of duplication but it is possible to generate # many examples. e.g. if we had input of the form none() | text() # then we would generate duplicates 50% of the time, and would # like to avoid that and spend more time exploring the text() half # of the search space. The tree allows us to predict in advance if # the test would lead to a duplicate and avoid that. # 2. When shrinking it is *extremely* useful to be able to anticipate # duplication, because we try many similar and smaller test cases, # and these will tend to have a very high duplication rate. This is # where the tree usage really shines. # # Unfortunately, as well as being the less useful type of tree usage, # the first type is also the most expensive! Once we've entered shrink # mode our time remaining is essentially bounded - we're just here # until we've found the minimal example. In exploration mode, we might # be early on in a very long-running processs, and keeping everything # we've ever seen lying around ends up bloating our memory usage # substantially by causing us to use O(max_examples) memory. # # As a compromise, what we do is reset the cache every so often. This # keeps our memory usage bounded. It has a few unfortunate failure # modes in that it means that we can't always detect when we should # have stopped - if we are exploring a language which has only slightly # more than cache reset frequency number of members, we will end up # exploring indefinitely when we could have stopped. However, this is # a fairly unusual case - thanks to exponential blow-ups in language # size, most languages are either very large (possibly infinite) or # very small. Nevertheless we want CACHE_RESET_FREQUENCY to be quite # high to avoid this case coming up in practice. if (self.call_count % CACHE_RESET_FREQUENCY == 0 and not self.interesting_examples): self.reset_tree_to_empty() self.tree.add(data) if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if getattr(test, 'is_hypothesis_test', False): note_deprecation( 'You have applied @given to a test more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'of the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())') settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, 'runner', None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ('You have applied @given to the method %s, which is ' 'used by the unittest runner but is not itself a test.' ' This is not useful in any way.' % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( 'You have applied @given to a method on %s, but this ' 'class does not inherit from the supported versions in ' '`hypothesis.extra.django`. Use the Hypothesis variants ' 'to ensure that each example is run in a separate ' 'database transaction.' % qualname(type(runner))) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed) reproduce_failure = \ wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ('Attempting to reproduce a failure from a different ' 'version of Hypothesis. This failure is from %s, but ' 'you are currently running %r. Please change your ' 'Hypothesis version to a matching one.') % (expected_version, __version__)) try: state.execute( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( 'Expected the test to raise an error, but it ' 'completed successfully.') except StopTest: raise DidNotReproduce( 'The shape of the test data has changed in some way ' 'from where this blob was defined. Are you sure ' "you're running the same test?") except UnsatisfiedAssumption: raise DidNotReproduce( 'The test data failed to satisfy an assumption in the ' 'test. Have you added it since this blob was ' 'generated?') execute_explicit_examples(test_runner, test, wrapped_test, settings, arguments, kwargs) if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: if isinstance(runner, TestCase) and hasattr(runner, 'subTest'): subTest = runner.subTest try: setattr(runner, 'subTest', fake_subTest) state.run() finally: setattr(runner, 'subTest', subTest) else: state.run() except BaseException: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None and not state.failed_normally: if running_under_pytest: report( ('You can add @seed(%(seed)d) to this test or run ' 'pytest with --hypothesis-seed=%(seed)d to ' 'reproduce this failure.') % {'seed': generated_seed}, ) else: report( ('You can add @seed(%d) to this test to reproduce ' 'this failure.') % (generated_seed, )) raise
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return zero_data = self.cached_test_function(hbytes(self.settings.buffer_size)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > self.settings.buffer_size ): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) if zero_data is not Overrun: # If the language starts with writes of length >= cap then there is # only one string in it: Everything after cap is forced to be zero (or # to be whatever value is written there). That means that once we've # tried the zero value, there's nothing left for us to do, so we # exit early here. has_non_forced = False # It's impossible to fall out of this loop normally because if we # did then that would mean that all blocks are writes, so we would # already have triggered the exhaustedness check on the tree and # finished running. for b in zero_data.blocks: # pragma: no branch if b.start >= self.cap: break if not b.forced: has_non_forced = True break if not has_non_forced: self.exit_with(ExitReason.finished) self.health_check_state = HealthCheckState() def should_generate_more(): # If we haven't found a bug, keep looking. We check this before # doing anything else as it's by far the most common case. if not self.interesting_examples: return True # If we've found a bug and won't report more than one, stop looking. elif not self.settings.report_multiple_bugs: return False assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count # End the generation phase where we would have ended it if no bugs had # been found. This reproduces the exit logic in `self.test_function`, # but with the important distinction that this clause will move on to # the shrinking phase having found one or more bugs, while the other # will exit having found zero bugs. if ( self.valid_examples >= self.settings.max_examples or self.call_count >= max(self.settings.max_examples * 10, 1000) ): # pragma: no cover return False # Otherwise, keep searching for between ten and 'a heuristic' calls. # We cap 'calls after first bug' so errors are reported reasonably # soon even for tests that are allowed to run for a very long time, # or sooner if the latest half of our test effort has been fruitless. return self.call_count < MIN_TEST_CALLS or self.call_count < min( self.first_bug_found_at + 1000, self.last_bug_found_at * 2 ) count = 0 while should_generate_more() and ( count < 10 or self.health_check_state is not None # If we have not found a valid prefix yet, the target selector will # be empty and the mutation stage will fail with a very rare internal # error. We therefore continue this initial random generation step # until we have found at least one prefix to mutate. or len(self.target_selector) == 0 ): prefix = self.generate_novel_prefix() def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index : data.index + n] # We always draw prefixes as a whole number of blocks assert len(result) == n else: result = uniform(self.random, n) return self.__zero_bound(data, result) last_data = self.new_conjecture_data(draw_bytes) self.test_function(last_data) last_data.freeze() count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while should_generate_more(): if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index : data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__zero_bound(data, result) data = self.new_conjecture_data(draw_bytes=draw_bytes) self.test_function(data) data.freeze() else: origin = self.target_selector.select() mutations += 1 data = self.new_conjecture_data(draw_bytes=mutator(origin)) self.test_function(data) data.freeze() if data.status > origin.status: mutations = 0 elif data.status < origin.status or mutations >= 10: # Cap the variations of a single example and move on to # an entirely fresh start. Ten is an entirely arbitrary # constant, but it's been working well for years. mutations = 0 mutator = self._new_mutator() if getattr(data, "hit_zero_bound", False): zero_bound_queue.append(data) mutations += 1
def run_state_machine(factory, data): cd = data.conjecture_data machine = factory() check_type(RuleBasedStateMachine, machine, "state_machine_factory()") cd.hypothesis_runner = machine print_steps = (current_build_context().is_final or current_verbosity() >= Verbosity.debug) try: if print_steps: report(f"state = {machine.__class__.__name__}()") machine.check_invariants(settings) max_steps = settings.stateful_step_count steps_run = 0 while True: # We basically always want to run the maximum number of steps, # but need to leave a small probability of terminating early # in order to allow for reducing the number of steps once we # find a failing test case, so we stop with probability of # 2 ** -16 during normal operation but force a stop when we've # generated enough steps. cd.start_example(STATE_MACHINE_RUN_LABEL) if steps_run == 0: cd.draw_bits(16, forced=1) elif steps_run >= max_steps: cd.draw_bits(16, forced=0) break else: # All we really care about is whether this value is zero # or non-zero, so if it's > 1 we discard it and insert a # replacement value after cd.start_example(SHOULD_CONTINUE_LABEL) should_continue_value = cd.draw_bits(16) if should_continue_value > 1: cd.stop_example(discard=True) cd.draw_bits(16, forced=int(bool(should_continue_value))) else: cd.stop_example() if should_continue_value == 0: break steps_run += 1 # Choose a rule to run, preferring an initialize rule if there are # any which have not been run yet. if machine._initialize_rules_to_run: init_rules = [ st.tuples(st.just(rule), st.fixed_dictionaries(rule.arguments)) for rule in machine._initialize_rules_to_run ] rule, data = cd.draw(st.one_of(init_rules)) machine._initialize_rules_to_run.remove(rule) else: rule, data = cd.draw(machine._rules_strategy) # Pretty-print the values this rule was called with *before* calling # _add_result_to_targets, to avoid printing arguments which are also # a return value using the variable name they are assigned to. # See https://github.com/HypothesisWorks/hypothesis/issues/2341 if print_steps: data_to_print = { k: machine._pretty_print(v) for k, v in data.items() } # Assign 'result' here in case executing the rule fails below result = multiple() try: data = dict(data) for k, v in list(data.items()): if isinstance(v, VarReference): data[k] = machine.names_to_values[v.name] result = rule.function(machine, **data) if rule.targets: if isinstance(result, MultipleResults): for single_result in result.values: machine._add_result_to_targets( rule.targets, single_result) else: machine._add_result_to_targets( rule.targets, result) elif result is not None: fail_health_check( settings, "Rules should return None if they have no target bundle, " f"but {rule.function.__qualname__} returned {result!r}", HealthCheck.return_value, ) finally: if print_steps: # 'result' is only used if the step has target bundles. # If it does, and the result is a 'MultipleResult', # then 'print_step' prints a multi-variable assignment. machine._print_step(rule, data_to_print, result) machine.check_invariants(settings) cd.stop_example() finally: if print_steps: report("state.teardown()") machine.teardown()
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return zero_data = self.cached_test_function(hbytes(BUFFER_SIZE)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) self.optimise_all(zero_data) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) self.health_check_state = HealthCheckState() def should_generate_more(): # If we haven't found a bug, keep looking. We check this before # doing anything else as it's by far the most common case. if not self.interesting_examples: return True # If we've found a bug and won't report more than one, stop looking. elif not self.settings.report_multiple_bugs: return False assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count # End the generation phase where we would have ended it if no bugs had # been found. This reproduces the exit logic in `self.test_function`, # but with the important distinction that this clause will move on to # the shrinking phase having found one or more bugs, while the other # will exit having found zero bugs. if (self.valid_examples >= self.settings.max_examples or self.call_count >= max(self.settings.max_examples * 10, 1000)): # pragma: no cover return False # Otherwise, keep searching for between ten and 'a heuristic' calls. # We cap 'calls after first bug' so errors are reported reasonably # soon even for tests that are allowed to run for a very long time, # or sooner if the latest half of our test effort has been fruitless. return self.call_count < MIN_TEST_CALLS or self.call_count < min( self.first_bug_found_at + 1000, self.last_bug_found_at * 2) # GenerationParameters are a set of decisions we make that are global # to the whole test case, used to bias the data generation in various # ways. This is an approach very very loosely inspired by the paper # "Swarm testing." by Groce et al. in that it induces deliberate # correlation between otherwise independent decisions made during the # generation process. # # More importantly the generation is designed to make certain scenarios # more likely (e.g. small examples, duplicated values), which can help # or hurt in terms of finding interesting things. Whenever the result # of our generation is a bad test case, for whatever definition of # "bad" we like (currently, invalid or too large), we ditch the # parameter early. This allows us to potentially generate good test # cases significantly more often than we otherwise would, by selecting # for parameters that make them more likely. parameter = GenerationParameters(self.random) count = 0 # We attempt to use the size of the minimal generated test case starting # from a given novel prefix as a guideline to generate smaller test # cases for an initial period, by restriscting ourselves to test cases # that are not much larger than it. # # Calculating the actual minimal generated test case is hard, so we # take a best guess that zero extending a prefix produces the minimal # test case starting with that prefix (this is true for our built in # strategies). This is only a reasonable thing to do if the resulting # test case is valid. If we regularly run into situations where it is # not valid then this strategy is a waste of time, so we want to # abandon it early. In order to do this we track how many times in a # row it has failed to work, and abort small test case generation when # it has failed too many times in a row. consecutive_zero_extend_is_invalid = 0 while should_generate_more(): prefix = self.generate_novel_prefix() assert len(prefix) <= BUFFER_SIZE # We control growth during initial example generation, for two # reasons: # # * It gives us an opportunity to find small examples early, which # gives us a fast path for easy to find bugs. # * It avoids low probability events where we might end up # generating very large examples during health checks, which # on slower machines can trigger HealthCheck.too_slow. # # The heuristic we use is that we attempt to estimate the smallest # extension of this prefix, and limit the size to no more than # an order of magnitude larger than that. If we fail to estimate # the size accurately, we skip over this prefix and try again. # # We need to tune the example size based on the initial prefix, # because any fixed size might be too small, and any size based # on the strategy in general can fall afoul of strategies that # have very different sizes for different prefixes. small_example_cap = clamp(10, self.settings.max_examples // 10, 50) if (self.valid_examples <= small_example_cap and self.call_count <= 5 * small_example_cap and not self.interesting_examples and consecutive_zero_extend_is_invalid < 5): minimal_example = self.cached_test_function( prefix + hbytes(BUFFER_SIZE - len(prefix))) if minimal_example.status < Status.VALID: consecutive_zero_extend_is_invalid += 1 continue consecutive_zero_extend_is_invalid = 0 minimal_extension = len(minimal_example.buffer) - len(prefix) max_length = min( len(prefix) + minimal_extension * 10, BUFFER_SIZE) # We could end up in a situation where even though the prefix was # novel when we generated it, because we've now tried zero extending # it not all possible continuations of it will be novel. In order to # avoid making redundant test calls, we rerun it in simulation mode # first. If this has a predictable result, then we don't bother # running the test function for real here. If however we encounter # some novel behaviour, we try again with the real test function, # starting from the new novel prefix that has discovered. try: trial_data = self.new_conjecture_data( prefix=prefix, parameter=parameter, max_length=max_length) self.tree.simulate_test_function(trial_data) continue except PreviouslyUnseenBehaviour: pass # If the simulation entered part of the tree that has been killed, # we don't want to run this. if trial_data.observer.killed: continue # We might have hit the cap on number of examples we should # run when calculating the minimal example. if not should_generate_more(): break prefix = trial_data.buffer else: max_length = BUFFER_SIZE data = self.new_conjecture_data(prefix=prefix, parameter=parameter, max_length=max_length) self.test_function(data) self.optimise_all(data) count += 1 if (data.status < Status.VALID or len(data.buffer) * 2 >= BUFFER_SIZE or count > 5): count = 0 parameter = GenerationParameters(self.random)
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, "is_hypothesis_test", False): raise InvalidArgument(( "You have applied @given to the test %s more than once, which " "wraps the test several times and is extremely slow. A " "similar effect can be gained by combining the arguments " "of the two calls to given. For example, instead of " "@given(booleans()) @given(integers()), you could write " "@given(booleans(), integers())") % (test.__name__, )) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) # Use type information to convert "infer" arguments into appropriate # strategies. if infer in given_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in given_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( "passed %s=infer for %s, but %s has no type annotation" % (name, test.__name__, name)) given_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, given_kwargs, argspec, test, settings, ) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, "runner", None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ("You have applied @given to the method %s, which is " "used by the unittest runner but is not itself a test." " This is not useful in any way." % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( "You have applied @given to a method on %s, but this " "class does not inherit from the supported versions in " "`hypothesis.extra.django`. Use the Hypothesis variants " "to ensure that each example is run in a separate " "database transaction." % qualname(type(runner))) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, wrapped_test, ) reproduce_failure = wrapped_test._hypothesis_internal_use_reproduce_failure # If there was a @reproduce_failure decorator, use it to reproduce # the error (or complain that we couldn't). Either way, this will # always raise some kind of error. if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ("Attempting to reproduce a failure from a different " "version of Hypothesis. This failure is from %s, but " "you are currently running %r. Please change your " "Hypothesis version to a matching one.") % (expected_version, __version__)) try: state.execute_once( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( "Expected the test to raise an error, but it " "completed successfully.") except StopTest: raise DidNotReproduce( "The shape of the test data has changed in some way " "from where this blob was defined. Are you sure " "you're running the same test?") except UnsatisfiedAssumption: raise DidNotReproduce( "The test data failed to satisfy an assumption in the " "test. Have you added it since this blob was " "generated?") # There was no @reproduce_failure, so start by running any explicit # examples from @example decorators. execute_explicit_examples(state, wrapped_test, arguments, kwargs) # If there were any explicit examples, they all ran successfully. # The next step is to use the Conjecture engine to run the test on # many different inputs. if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: if isinstance(runner, TestCase) and hasattr(runner, "subTest"): subTest = runner.subTest try: runner.subTest = fake_subTest state.run_engine() finally: runner.subTest = subTest else: state.run_engine() except BaseException as e: # The exception caught here should either be an actual test # failure (or MultipleFailures), or some kind of fatal error # that caused the engine to stop. generated_seed = wrapped_test._hypothesis_internal_use_generated_seed with local_settings(settings): if not (state.failed_normally or generated_seed is None): if running_under_pytest: report( "You can add @seed(%(seed)d) to this test or " "run pytest with --hypothesis-seed=%(seed)d " "to reproduce this failure." % {"seed": generated_seed}) else: report("You can add @seed(%d) to this test to " "reproduce this failure." % (generated_seed, )) # The dance here is to avoid showing users long tracebacks # full of Hypothesis internals they don't care about. # We have to do this inline, to avoid adding another # internal stack frame just when we've removed the rest. if PY2: # Python 2 doesn't have Exception.with_traceback(...); # instead it has a three-argument form of the `raise` # statement. Unfortunately this is a SyntaxError on # Python 3, and before Python 2.7.9 it was *also* a # SyntaxError to use it in a nested function so we # can't `exec` or `eval` our way out (BPO-21591). # So unless we break some versions of Python 2, none # of them get traceback elision. raise # On Python 3, we swap out the real traceback for our # trimmed version. Using a variable ensures that the line # which will actually appear in tracebacks is as clear as # possible - "raise the_error_hypothesis_found". the_error_hypothesis_found = e.with_traceback( get_trimmed_traceback()) raise the_error_hypothesis_found
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return self.debug("Generating new examples") assert self.should_generate_more() zero_data = self.cached_test_function(bytes(BUFFER_SIZE)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE ): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) self.health_check_state = HealthCheckState() # We attempt to use the size of the minimal generated test case starting # from a given novel prefix as a guideline to generate smaller test # cases for an initial period, by restriscting ourselves to test cases # that are not much larger than it. # # Calculating the actual minimal generated test case is hard, so we # take a best guess that zero extending a prefix produces the minimal # test case starting with that prefix (this is true for our built in # strategies). This is only a reasonable thing to do if the resulting # test case is valid. If we regularly run into situations where it is # not valid then this strategy is a waste of time, so we want to # abandon it early. In order to do this we track how many times in a # row it has failed to work, and abort small test case generation when # it has failed too many times in a row. consecutive_zero_extend_is_invalid = 0 # We control growth during initial example generation, for two # reasons: # # * It gives us an opportunity to find small examples early, which # gives us a fast path for easy to find bugs. # * It avoids low probability events where we might end up # generating very large examples during health checks, which # on slower machines can trigger HealthCheck.too_slow. # # The heuristic we use is that we attempt to estimate the smallest # extension of this prefix, and limit the size to no more than # an order of magnitude larger than that. If we fail to estimate # the size accurately, we skip over this prefix and try again. # # We need to tune the example size based on the initial prefix, # because any fixed size might be too small, and any size based # on the strategy in general can fall afoul of strategies that # have very different sizes for different prefixes. small_example_cap = clamp(10, self.settings.max_examples // 10, 50) optimise_at = max(self.settings.max_examples // 2, small_example_cap + 1) ran_optimisations = False while self.should_generate_more(): prefix = self.generate_novel_prefix() assert len(prefix) <= BUFFER_SIZE if ( self.valid_examples <= small_example_cap and self.call_count <= 5 * small_example_cap and not self.interesting_examples and consecutive_zero_extend_is_invalid < 5 ): minimal_example = self.cached_test_function( prefix + bytes(BUFFER_SIZE - len(prefix)) ) if minimal_example.status < Status.VALID: consecutive_zero_extend_is_invalid += 1 continue consecutive_zero_extend_is_invalid = 0 minimal_extension = len(minimal_example.buffer) - len(prefix) max_length = min(len(prefix) + minimal_extension * 10, BUFFER_SIZE) # We could end up in a situation where even though the prefix was # novel when we generated it, because we've now tried zero extending # it not all possible continuations of it will be novel. In order to # avoid making redundant test calls, we rerun it in simulation mode # first. If this has a predictable result, then we don't bother # running the test function for real here. If however we encounter # some novel behaviour, we try again with the real test function, # starting from the new novel prefix that has discovered. try: trial_data = self.new_conjecture_data( prefix=prefix, max_length=max_length ) self.tree.simulate_test_function(trial_data) continue except PreviouslyUnseenBehaviour: pass # If the simulation entered part of the tree that has been killed, # we don't want to run this. if trial_data.observer.killed: continue # We might have hit the cap on number of examples we should # run when calculating the minimal example. if not self.should_generate_more(): break prefix = trial_data.buffer else: max_length = BUFFER_SIZE data = self.new_conjecture_data(prefix=prefix, max_length=max_length) self.test_function(data) self.generate_mutations_from(data) # Although the optimisations are logically a distinct phase, we # actually normally run them as part of example generation. The # reason for this is that we cannot guarantee that optimisation # actually exhausts our budget: It might finish running and we # discover that actually we still could run a bunch more test cases # if we want. if ( self.valid_examples >= max(small_example_cap, optimise_at) and not ran_optimisations ): ran_optimisations = True self.optimise_targets()
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return zero_data = self.cached_test_function(hbytes(BUFFER_SIZE)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) self.optimise_all(zero_data) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) self.health_check_state = HealthCheckState() def should_generate_more(): # If we haven't found a bug, keep looking. We check this before # doing anything else as it's by far the most common case. if not self.interesting_examples: return True # If we've found a bug and won't report more than one, stop looking. elif not self.settings.report_multiple_bugs: return False assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count # End the generation phase where we would have ended it if no bugs had # been found. This reproduces the exit logic in `self.test_function`, # but with the important distinction that this clause will move on to # the shrinking phase having found one or more bugs, while the other # will exit having found zero bugs. if (self.valid_examples >= self.settings.max_examples or self.call_count >= max(self.settings.max_examples * 10, 1000)): # pragma: no cover return False # Otherwise, keep searching for between ten and 'a heuristic' calls. # We cap 'calls after first bug' so errors are reported reasonably # soon even for tests that are allowed to run for a very long time, # or sooner if the latest half of our test effort has been fruitless. return self.call_count < MIN_TEST_CALLS or self.call_count < min( self.first_bug_found_at + 1000, self.last_bug_found_at * 2) # GenerationParameters are a set of decisions we make that are global # to the whole test case, used to bias the data generation in various # ways. This is an approach very very loosely inspired by the paper # "Swarm testing." by Groce et al. in that it induces deliberate # correlation between otherwise independent decisions made during the # generation process. # # More importantly the generation is designed to make certain scenarios # more likely (e.g. small examples, duplicated values), which can help # or hurt in terms of finding interesting things. Whenever the result # of our generation is a bad test case, for whatever definition of # "bad" we like (currently, invalid or too large), we ditch the # parameter early. This allows us to potentially generate good test # cases significantly more often than we otherwise would, by selecting # for parameters that make them more likely. parameter = GenerationParameters(self.random) count = 0 while should_generate_more(): prefix = self.generate_novel_prefix() data = self.new_conjecture_data(draw_bytes_with(prefix, parameter)) self.test_function(data) self.optimise_all(data) count += 1 if (data.status < Status.VALID or len(data.buffer) * 2 >= BUFFER_SIZE or count > 5): count = 0 parameter = GenerationParameters(self.random)
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return zero_data = self.cached_test_function(hbytes(BUFFER_SIZE)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) if zero_data is not Overrun: # If the language starts with writes of length >= cap then there is # only one string in it: Everything after cap is forced to be zero (or # to be whatever value is written there). That means that once we've # tried the zero value, there's nothing left for us to do, so we # exit early here. has_non_forced = False # It's impossible to fall out of this loop normally because if we # did then that would mean that all blocks are writes, so we would # already have triggered the exhaustedness check on the tree and # finished running. for b in zero_data.blocks: # pragma: no branch if b.start >= self.cap: break if not b.forced: has_non_forced = True break if not has_non_forced: self.exit_with(ExitReason.finished) self.health_check_state = HealthCheckState() def should_generate_more(): # If we haven't found a bug, keep looking. We check this before # doing anything else as it's by far the most common case. if not self.interesting_examples: return True # If we've found a bug and won't report more than one, stop looking. elif not self.settings.report_multiple_bugs: return False assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count # End the generation phase where we would have ended it if no bugs had # been found. This reproduces the exit logic in `self.test_function`, # but with the important distinction that this clause will move on to # the shrinking phase having found one or more bugs, while the other # will exit having found zero bugs. if (self.valid_examples >= self.settings.max_examples or self.call_count >= max(self.settings.max_examples * 10, 1000)): # pragma: no cover return False # Otherwise, keep searching for between ten and 'a heuristic' calls. # We cap 'calls after first bug' so errors are reported reasonably # soon even for tests that are allowed to run for a very long time, # or sooner if the latest half of our test effort has been fruitless. return self.call_count < MIN_TEST_CALLS or self.call_count < min( self.first_bug_found_at + 1000, self.last_bug_found_at * 2) count = 0 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while should_generate_more(): if (count < 10 or self.health_check_state is not None # If we have not found a valid prefix yet, the target selector will # be empty and the mutation stage will fail with a very rare internal # error. We therefore continue this initial random generation step # until we have found at least one prefix to mutate. or len(self.target_selector) == 0 # For long-running tests, if we are not currently dealing with an # overrun we want a small chance to generate an entirely novel buffer. or not (zero_bound_queue or self.random.randrange(20))): prefix = self.generate_novel_prefix() def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index:data.index + n] # We always draw prefixes as a whole number of blocks assert len(result) == n else: result = uniform(self.random, n) return self.__zero_bound(data, result) data = self.new_conjecture_data(draw_bytes) self.test_function(data) data.freeze() count += 1 elif zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__zero_bound(data, result) data = self.new_conjecture_data(draw_bytes=draw_bytes) self.test_function(data) data.freeze() else: origin = self.target_selector.select() mutations += 1 data = self.new_conjecture_data(draw_bytes=mutator(origin)) self.test_function(data) data.freeze() if data.status > origin.status: mutations = 0 elif data.status < origin.status or mutations >= 10: # Cap the variations of a single example and move on to # an entirely fresh start. Ten is an entirely arbitrary # constant, but it's been working well for years. mutations = 0 mutator = self._new_mutator() if getattr(data, "hit_zero_bound", False): zero_bound_queue.append(data) mutations += 1
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, 'is_hypothesis_test', False): note_deprecation( ('You have applied @given to test: %s more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'of the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())') % (test.__name__, )) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, 'runner', None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ('You have applied @given to the method %s, which is ' 'used by the unittest runner but is not itself a test.' ' This is not useful in any way.' % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( 'You have applied @given to a method on %s, but this ' 'class does not inherit from the supported versions in ' '`hypothesis.extra.django`. Use the Hypothesis variants ' 'to ensure that each example is run in a separate ' 'database transaction.' % qualname(type(runner))) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed) reproduce_failure = \ wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ('Attempting to reproduce a failure from a different ' 'version of Hypothesis. This failure is from %s, but ' 'you are currently running %r. Please change your ' 'Hypothesis version to a matching one.') % (expected_version, __version__)) try: state.execute( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( 'Expected the test to raise an error, but it ' 'completed successfully.') except StopTest: raise DidNotReproduce( 'The shape of the test data has changed in some way ' 'from where this blob was defined. Are you sure ' "you're running the same test?") except UnsatisfiedAssumption: raise DidNotReproduce( 'The test data failed to satisfy an assumption in the ' 'test. Have you added it since this blob was ' 'generated?') execute_explicit_examples(test_runner, test, wrapped_test, settings, arguments, kwargs) if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: if isinstance(runner, TestCase) and hasattr(runner, 'subTest'): subTest = runner.subTest try: setattr(runner, 'subTest', fake_subTest) state.run() finally: setattr(runner, 'subTest', subTest) else: state.run() except BaseException as e: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed with local_settings(settings): if not (state.failed_normally or generated_seed is None): if running_under_pytest: report( 'You can add @seed(%(seed)d) to this test or ' 'run pytest with --hypothesis-seed=%(seed)d ' 'to reproduce this failure.' % {'seed': generated_seed}) else: report('You can add @seed(%d) to this test to ' 'reproduce this failure.' % (generated_seed, )) # The dance here is to avoid showing users long tracebacks # full of Hypothesis internals they don't care about. # We have to do this inline, to avoid adding another # internal stack frame just when we've removed the rest. if PY2: # Python 2 doesn't have Exception.with_traceback(...); # instead it has a three-argument form of the `raise` # statement. Unfortunately this is a SyntaxError on # Python 3, and before Python 2.7.9 it was *also* a # SyntaxError to use it in a nested function so we # can't `exec` or `eval` our way out (BPO-21591). # So unless we break some versions of Python 2, none # of them get traceback elision. raise # On Python 3, we swap out the real traceback for our # trimmed version. Using a variable ensures that the line # which will actually appear in trackbacks is as clear as # possible - "raise the_error_hypothesis_found". the_error_hypothesis_found = \ e.with_traceback(get_trimmed_traceback()) raise the_error_hypothesis_found
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return if self.interesting_examples: # The example database has failing examples from a previous run, # so we'd rather report that they're still failing ASAP than take # the time to look for additional failures. return self.debug("Generating new examples") zero_data = self.cached_test_function(hbytes(BUFFER_SIZE)) if zero_data.status > Status.OVERRUN: self.__data_cache.pin(zero_data.buffer) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > BUFFER_SIZE ): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) self.health_check_state = HealthCheckState() def should_generate_more(): # End the generation phase where we would have ended it if no bugs had # been found. This reproduces the exit logic in `self.test_function`, # but with the important distinction that this clause will move on to # the shrinking phase having found one or more bugs, while the other # will exit having found zero bugs. if ( self.valid_examples >= self.settings.max_examples or self.call_count >= max(self.settings.max_examples * 10, 1000) or ( self.best_examples_of_observed_targets and self.valid_examples * 2 >= self.settings.max_examples and self.should_optimise ) ): # pragma: no cover return False # If we haven't found a bug, keep looking - if we hit any limits on # the number of tests to run that will raise an exception and stop # the run. if not self.interesting_examples: return True # If we've found a bug and won't report more than one, stop looking. elif not self.settings.report_multiple_bugs: return False assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count # Otherwise, keep searching for between ten and 'a heuristic' calls. # We cap 'calls after first bug' so errors are reported reasonably # soon even for tests that are allowed to run for a very long time, # or sooner if the latest half of our test effort has been fruitless. return self.call_count < MIN_TEST_CALLS or self.call_count < min( self.first_bug_found_at + 1000, self.last_bug_found_at * 2 ) # We attempt to use the size of the minimal generated test case starting # from a given novel prefix as a guideline to generate smaller test # cases for an initial period, by restriscting ourselves to test cases # that are not much larger than it. # # Calculating the actual minimal generated test case is hard, so we # take a best guess that zero extending a prefix produces the minimal # test case starting with that prefix (this is true for our built in # strategies). This is only a reasonable thing to do if the resulting # test case is valid. If we regularly run into situations where it is # not valid then this strategy is a waste of time, so we want to # abandon it early. In order to do this we track how many times in a # row it has failed to work, and abort small test case generation when # it has failed too many times in a row. consecutive_zero_extend_is_invalid = 0 while should_generate_more(): prefix = self.generate_novel_prefix() assert len(prefix) <= BUFFER_SIZE # We control growth during initial example generation, for two # reasons: # # * It gives us an opportunity to find small examples early, which # gives us a fast path for easy to find bugs. # * It avoids low probability events where we might end up # generating very large examples during health checks, which # on slower machines can trigger HealthCheck.too_slow. # # The heuristic we use is that we attempt to estimate the smallest # extension of this prefix, and limit the size to no more than # an order of magnitude larger than that. If we fail to estimate # the size accurately, we skip over this prefix and try again. # # We need to tune the example size based on the initial prefix, # because any fixed size might be too small, and any size based # on the strategy in general can fall afoul of strategies that # have very different sizes for different prefixes. small_example_cap = clamp(10, self.settings.max_examples // 10, 50) if ( self.valid_examples <= small_example_cap and self.call_count <= 5 * small_example_cap and not self.interesting_examples and consecutive_zero_extend_is_invalid < 5 ): minimal_example = self.cached_test_function( prefix + hbytes(BUFFER_SIZE - len(prefix)) ) if minimal_example.status < Status.VALID: consecutive_zero_extend_is_invalid += 1 continue consecutive_zero_extend_is_invalid = 0 minimal_extension = len(minimal_example.buffer) - len(prefix) max_length = min(len(prefix) + minimal_extension * 10, BUFFER_SIZE) # We could end up in a situation where even though the prefix was # novel when we generated it, because we've now tried zero extending # it not all possible continuations of it will be novel. In order to # avoid making redundant test calls, we rerun it in simulation mode # first. If this has a predictable result, then we don't bother # running the test function for real here. If however we encounter # some novel behaviour, we try again with the real test function, # starting from the new novel prefix that has discovered. try: trial_data = self.new_conjecture_data( prefix=prefix, max_length=max_length ) self.tree.simulate_test_function(trial_data) continue except PreviouslyUnseenBehaviour: pass # If the simulation entered part of the tree that has been killed, # we don't want to run this. if trial_data.observer.killed: continue # We might have hit the cap on number of examples we should # run when calculating the minimal example. if not should_generate_more(): break prefix = trial_data.buffer else: max_length = BUFFER_SIZE data = self.new_conjecture_data(prefix=prefix, max_length=max_length) self.test_function(data) # A thing that is often useful but rarely happens by accident is # to generate the same value at multiple different points in the # test case. # # Rather than make this the responsibility of individual strategies # we implement a small mutator that just takes parts of the test # case with the same label and tries replacing one of them with a # copy of the other and tries running it. If we've made a good # guess about what to put where, this will run a similar generated # test case with more duplication. if ( # An OVERRUN doesn't have enough information about the test # case to mutate, so we just skip those. data.status >= Status.INVALID # This has a tendency to trigger some weird edge cases during # generation so we don't let it run until we're done with the # health checks. and self.health_check_state is None ): initial_calls = self.call_count failed_mutations = 0 while ( should_generate_more() # We implement fairly conservative checks for how long we # we should run mutation for, as it's generally not obvious # how helpful it is for any given test case. and self.call_count <= initial_calls + 5 and failed_mutations <= 5 ): groups = defaultdict(list) for ex in data.examples: groups[ex.label, ex.depth].append(ex) groups = [v for v in groups.values() if len(v) > 1] if not groups: break group = self.random.choice(groups) ex1, ex2 = sorted( self.random.sample(group, 2), key=lambda i: i.index ) assert ex1.end <= ex2.start replacements = [data.buffer[e.start : e.end] for e in [ex1, ex2]] replacement = self.random.choice(replacements) try: # We attempt to replace both the the examples with # whichever choice we made. Note that this might end # up messing up and getting the example boundaries # wrong - labels matching are only a best guess as to # whether the two are equivalent - but it doesn't # really matter. It may not achieve the desired result # but it's still a perfectly acceptable choice sequence. # to try. new_data = self.cached_test_function( data.buffer[: ex1.start] + replacement + data.buffer[ex1.end : ex2.start] + replacement + data.buffer[ex2.end :], # We set error_on_discard so that we don't end up # entering parts of the tree we consider redundant # and not worth exploring. error_on_discard=True, extend=BUFFER_SIZE, ) except ContainsDiscard: failed_mutations += 1 continue if ( new_data.status >= data.status and data.buffer != new_data.buffer and all( k in new_data.target_observations and new_data.target_observations[k] >= v for k, v in data.target_observations.items() ) ): data = new_data failed_mutations = 0 else: failed_mutations += 1
def record_for_health_check(self, data): # Once we've actually found a bug, there's no point in trying to run # health checks - they'll just mask the actually important information. if data.status == Status.INTERESTING: self.health_check_state = None state = self.health_check_state if state is None: return state.draw_times.extend(data.draw_times) if data.status == Status.VALID: state.valid_examples += 1 elif data.status == Status.INVALID: state.invalid_examples += 1 else: assert data.status == Status.OVERRUN state.overrun_examples += 1 max_valid_draws = 10 max_invalid_draws = 50 max_overrun_draws = 20 assert state.valid_examples <= max_valid_draws if state.valid_examples == max_valid_draws: self.health_check_state = None return if state.overrun_examples == max_overrun_draws: fail_health_check( self.settings, ( "Examples routinely exceeded the max allowable size. " "(%d examples overran while generating %d valid ones)" ". Generating examples this large will usually lead to" " bad results. You could try setting max_size parameters " "on your collections and turning " "max_leaves down on recursive() calls." ) % (state.overrun_examples, state.valid_examples), HealthCheck.data_too_large, ) if state.invalid_examples == max_invalid_draws: fail_health_check( self.settings, ( "It looks like your strategy is filtering out a lot " "of data. Health check found %d filtered examples but " "only %d good ones. This will make your tests much " "slower, and also will probably distort the data " "generation quite a lot. You should adapt your " "strategy to filter less. This can also be caused by " "a low max_leaves parameter in recursive() calls" ) % (state.invalid_examples, state.valid_examples), HealthCheck.filter_too_much, ) draw_time = sum(state.draw_times) if draw_time > 1.0: fail_health_check( self.settings, ( "Data generation is extremely slow: Only produced " "%d valid examples in %.2f seconds (%d invalid ones " "and %d exceeded maximum size). Try decreasing " "size of the data you're generating (with e.g." "max_size or max_leaves parameters)." ) % ( state.valid_examples, draw_time, state.invalid_examples, state.overrun_examples, ), HealthCheck.too_slow, )
def pytest_runtest_call(item): if not (hasattr(item, "obj") and "hypothesis" in sys.modules): yield return from hypothesis import core from hypothesis.internal.detection import is_hypothesis_test core.running_under_pytest = True if not is_hypothesis_test(item.obj): # If @given was not applied, check whether other hypothesis # decorators were applied, and raise an error if they were. if getattr(item.obj, "is_hypothesis_strategy_function", False): from hypothesis.errors import InvalidArgument raise InvalidArgument( f"{item.nodeid} is a function that returns a Hypothesis strategy, " "but pytest has collected it as a test function. This is useless " "as the function body will never be executed. To define a test " "function, use @given instead of @composite.") message = "Using `@%s` on a test without `@given` is completely pointless." for name, attribute in [ ("example", "hypothesis_explicit_examples"), ("seed", "_hypothesis_internal_use_seed"), ("settings", "_hypothesis_internal_settings_applied"), ("reproduce_example", "_hypothesis_internal_use_reproduce_failure"), ]: if hasattr(item.obj, attribute): from hypothesis.errors import InvalidArgument raise InvalidArgument(message % (name, )) yield else: from hypothesis import HealthCheck, settings from hypothesis.internal.escalation import current_pytest_item from hypothesis.internal.healthcheck import fail_health_check from hypothesis.reporting import with_reporter from hypothesis.statistics import collector, describe_statistics # Retrieve the settings for this test from the test object, which # is normally a Hypothesis wrapped_test wrapper. If this doesn't # work, the test object is probably something weird # (e.g a stateful test wrapper), so we skip the function-scoped # fixture check. settings = getattr(item.obj, "_hypothesis_internal_use_settings", None) # Check for suspicious use of function-scoped fixtures, but only # if the corresponding health check is not suppressed. if (settings is not None and HealthCheck.function_scoped_fixture not in settings.suppress_health_check): # Warn about function-scoped fixtures, excluding autouse fixtures because # the advice is probably not actionable and the status quo seems OK... # See https://github.com/HypothesisWorks/hypothesis/issues/377 for detail. argnames = None for fx_defs in item._request._fixturemanager.getfixtureinfo( node=item, func=item.function, cls=None).name2fixturedefs.values(): if argnames is None: argnames = frozenset( signature(item.function).parameters) for fx in fx_defs: if fx.argname in argnames: active_fx = item._request._get_active_fixturedef( fx.argname) if active_fx.scope == "function": fail_health_check( settings, _FIXTURE_MSG.format( fx.argname, item.nodeid), HealthCheck.function_scoped_fixture, ) if item.get_closest_marker("parametrize") is not None: # Give every parametrized test invocation a unique database key key = item.nodeid.encode() item.obj.hypothesis.inner_test._hypothesis_internal_add_digest = key store = StoringReporter(item.config) def note_statistics(stats): stats["nodeid"] = item.nodeid item.hypothesis_statistics = base64.b64encode( describe_statistics(stats).encode()).decode() with collector.with_value(note_statistics): with with_reporter(store): with current_pytest_item.with_value(item): yield if store.results: item.hypothesis_report_information = list(store.results)
def pytest_runtest_call(item): if not hasattr(item, "obj"): yield elif not is_hypothesis_test(item.obj): # If @given was not applied, check whether other hypothesis # decorators were applied, and raise an error if they were. if getattr(item.obj, "is_hypothesis_strategy_function", False): raise InvalidArgument( "%s is a function that returns a Hypothesis strategy, but pytest " "has collected it as a test function. This is useless as the " "function body will never be executed. To define a test " "function, use @given instead of @composite." % (item.nodeid, )) message = "Using `@%s` on a test without `@given` is completely pointless." for name, attribute in [ ("example", "hypothesis_explicit_examples"), ("seed", "_hypothesis_internal_use_seed"), ("settings", "_hypothesis_internal_settings_applied"), ("reproduce_example", "_hypothesis_internal_use_reproduce_failure"), ]: if hasattr(item.obj, attribute): raise InvalidArgument(message % (name, )) yield else: # Retrieve the settings for this test from the test object, which # is normally a Hypothesis wrapped_test wrapper. If this doesn't # work, the test object is probably something weird # (e.g a stateful test wrapper), so we skip the function-scoped # fixture check. settings = getattr(item.obj, "_hypothesis_internal_use_settings", None) # Check for suspicious use of function-scoped fixtures, but only # if the corresponding health check is not suppressed. if (settings is not None and HealthCheck.function_scoped_fixture not in settings.suppress_health_check): # Warn about function-scoped fixtures, excluding autouse fixtures because # the advice is probably not actionable and the status quo seems OK... # See https://github.com/HypothesisWorks/hypothesis/issues/377 for detail. msg = ( "%s uses the %r fixture, which is reset between function calls but not " "between test cases generated by `@given(...)`. You can change it to " "a module- or session-scoped fixture if it is safe to reuse; if not " "we recommend using a context manager inside your test function. See " "https://docs.pytest.org/en/latest/fixture.html#sharing-test-data " "for details on fixture scope.") argnames = None for fx_defs in item._request._fixturemanager.getfixtureinfo( node=item, func=item.function, cls=None).name2fixturedefs.values(): if argnames is None: argnames = frozenset( signature(item.function).parameters) for fx in fx_defs: if fx.argname in argnames: active_fx = item._request._get_active_fixturedef( fx.argname) if active_fx.scope == "function": fail_health_check( settings, msg % (item.nodeid, fx.argname), HealthCheck.function_scoped_fixture, ) if item.get_closest_marker("parametrize") is not None: # Give every parametrized test invocation a unique database key key = item.nodeid.encode("utf-8") item.obj.hypothesis.inner_test._hypothesis_internal_add_digest = key store = StoringReporter(item.config) def note_statistics(stats): stats["nodeid"] = item.nodeid item.hypothesis_statistics = base64.b64encode( describe_statistics(stats).encode()).decode() with collector.with_value(note_statistics): with with_reporter(store): yield if store.results: item.hypothesis_report_information = list(store.results)