def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime if self.settings.deadline is not_set: if ( not self.__warned_deadline and runtime >= 200 ): self.__warned_deadline = True note_deprecation(( 'Test: %s took %.2fms to run. In future the ' 'default deadline setting will be 200ms, which ' 'will make this an error. You can set deadline to ' 'an explicit value of e.g. %d to turn tests ' 'slower than this into an error, or you can set ' 'it to None to disable this check entirely.') % ( self.test.__name__, runtime, ceil(runtime / 100) * 100, )) else: current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result
def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime if self.settings.deadline is not_set: if (not self.__warned_deadline and runtime >= 200): self.__warned_deadline = True note_deprecation( ('Test took %.2fms to run. In future the default ' 'deadline setting will be 200ms, which will ' 'make this an error. You can set deadline to ' 'an explicit value of e.g. %d to turn tests ' 'slower than this into an error, or you can set ' 'it to None to disable this check entirely.') % ( runtime, ceil(runtime / 100) * 100, )) else: current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result
def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.finish_time = benchmark_time() while self.example_stack: self.stop_example() self.frozen = True if self.status >= Status.VALID: discards = [] for ex in self.examples: if ex.length == 0: continue if discards: u, v = discards[-1] if u <= ex.start <= ex.end <= v: continue if ex.discarded: discards.append((ex.start, ex.end)) continue self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def __init__(self, max_length, draw_bytes): self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.block_starts = {} self.blocks = [] self.buffer = bytearray() self.output = u'' self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.masked_indices = {} self.interesting_origin = None self.draw_times = [] self.max_depth = 0 self.examples = [] self.example_stack = [] self.has_discards = False top = self.start_example(TOP_LABEL) assert top.depth == 0
def __init__(self, test_function, settings=None, random=None, database_key=None): self._test_function = test_function self.settings = settings or Settings() self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = benchmark_time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.all_drawtimes = [] self.all_runtimes = [] self.events_to_strings = WeakKeyDictionary() self.target_selector = TargetSelector(self.random) self.interesting_examples = {} self.covering_examples = {} self.shrunk_examples = set() self.health_check_state = None self.used_examples_from_database = False self.reset_tree_to_empty()
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.finish_time = benchmark_time() while self.example_stack: self.stop_example() self.frozen = True if self.status >= Status.VALID: discards = [] for ex in self.examples: if ex.length == 0: continue if discards: u, v = discards[-1] if u <= ex.start <= ex.end <= v: continue if ex.discarded: discards.append((ex.start, ex.end)) continue self.tags.add(structural_tag(ex.label)) self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def __init__(self, max_length, draw_bytes): self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.level = 0 self.block_starts = {} self.blocks = [] self.buffer = bytearray() self.output = u'' self.status = Status.VALID self.frozen = False self.intervals_by_level = [] self.interval_stack = [] global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.capped_indices = {} self.interesting_origin = None self.tags = set() self.draw_times = [] self.__intervals = None self.shrinking_blocks = set() self.discarded = []
def __init__(self, max_length, draw_bytes): self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.block_starts = {} self.blocks = [] self.buffer = bytearray() self.output = u'' self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.forced_blocks = set() self.masked_indices = {} self.interesting_origin = None self.tags = set() self.draw_times = [] self.__intervals = None self.max_depth = 0 self.examples = [] self.example_stack = [] self.has_discards = False self.start_example(TOP_LABEL)
def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = datetime.timedelta( seconds=finish - start - internal_draw_time ) self.__test_runtime = runtime current_deadline = self.settings.deadline if not is_final: current_deadline = (current_deadline // 4) * 5 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result
def _run(self): self.start_time = benchmark_time() self.reuse_existing_examples() self.generate_new_examples() self.shrink_interesting_examples() self.exit_with(ExitReason.finished)
def __init__(self, max_length, prefix, random, observer=None): if observer is None: observer = DataObserver() assert isinstance(observer, DataObserver) self.__bytes_drawn = 0 self.observer = observer self.max_length = max_length self.is_find = False self.overdraw = 0 self.__block_starts = defaultdict(list) self.__block_starts_calculated_to = 0 self.__prefix = prefix self.__random = random assert random is not None or max_length <= len(prefix) self.blocks = Blocks(self) self.buffer = bytearray() self.index = 0 self.output = u"" self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.interesting_origin = None self.draw_times = [] self.max_depth = 0 self.has_discards = False self.consecutive_discard_counts = [] self.__result = None # Observations used for targeted search. They'll be aggregated in # ConjectureRunner.generate_new_examples and fed to TargetSelector. self.target_observations = {} # Tags which indicate something about which part of the search space # this example is in. These are used to guide generation. self.tags = set() self.labels_for_structure_stack = [] # Normally unpopulated but we need this in the niche case # that self.as_result() is Overrun but we still want the # examples for reporting purposes. self.__examples = None # We want the top level example to have depth 0, so we start # at -1. self.depth = -1 self.__example_record = ExampleRecord() self.extra_information = ExtraInformation() self.start_example(TOP_LABEL)
def __draw(self, strategy): at_top_level = self.depth == 0 self.start_example() try: if not at_top_level: return strategy.do_draw(self) else: start_time = benchmark_time() try: return strategy.do_draw(self) except BaseException as e: mark_for_escalation(e) raise finally: self.draw_times.append(benchmark_time() - start_time) finally: if not self.frozen: self.stop_example()
def draw(self, strategy, label=None): if self.is_find and not strategy.supports_find: raise InvalidArgument( ( "Cannot use strategy %r within a call to find (presumably " "because it would be invalid after the call had ended)." ) % (strategy,) ) at_top_level = self.depth == 0 if at_top_level: # We start this timer early, because accessing attributes on a LazyStrategy # can be almost arbitrarily slow. In cases like characters() and text() # where we cache something expensive, this led to Flaky deadline errors! # See https://github.com/HypothesisWorks/hypothesis/issues/2108 start_time = benchmark_time() strategy.validate() if strategy.is_empty: self.mark_invalid() if self.depth >= MAX_DEPTH: self.mark_invalid() if label is None: label = strategy.label self.start_example(label=label) try: if not at_top_level: return strategy.do_draw(self) else: try: strategy.validate() try: return strategy.do_draw(self) finally: self.draw_times.append(benchmark_time() - start_time) except BaseException as e: mark_for_escalation(e) raise finally: self.stop_example()
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.frozen = True self.finish_time = benchmark_time() self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def __draw(self, strategy, label): at_top_level = self.depth == 0 if label is None: label = strategy.label self.start_example(label=label) try: if not at_top_level: return strategy.do_draw(self) else: start_time = benchmark_time() try: return strategy.do_draw(self) except BaseException as e: mark_for_escalation(e) raise finally: self.draw_times.append(benchmark_time() - start_time) finally: self.stop_example()
def __draw(self, strategy, label): at_top_level = self.depth == 0 if label is None: label = strategy.label self.start_example(label=label) try: if not at_top_level: return strategy.do_draw(self) else: try: strategy.validate() start_time = benchmark_time() try: return strategy.do_draw(self) finally: self.draw_times.append(benchmark_time() - start_time) except BaseException as e: mark_for_escalation(e) raise finally: self.stop_example()
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.frozen = True self.finish_time = benchmark_time() # Intervals are sorted as longest first, then by interval start. for l in self.intervals_by_level: for i in hrange(len(l) - 1): if l[i][1] == l[i + 1][0]: self.intervals.append((l[i][0], l[i + 1][1])) self.intervals = sorted(set(self.intervals), key=lambda se: (se[0] - se[1], se[0])) self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.finish_time = benchmark_time() assert len(self.buffer) == self.index # Always finish by closing all remaining examples so that we have a # valid tree. while self.depth >= 0: self.stop_example() self.frozen = True self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def __init__(self, max_length, draw_bytes, observer=None): if observer is None: observer = DataObserver() assert isinstance(observer, DataObserver) self.observer = observer self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.__block_starts = defaultdict(list) self.__block_starts_calculated_to = 0 self.blocks = Blocks(self) self.buffer = bytearray() self.index = 0 self.output = u"" self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.interesting_origin = None self.draw_times = [] self.max_depth = 0 self.has_discards = False self.consecutive_discard_counts = [] self.__result = None # Normally unpopulated but we need this in the niche case # that self.as_result() is Overrun but we still want the # examples for reporting purposes. self.__examples = None # We want the top level example to have depth 0, so we start # at -1. self.depth = -1 self.__example_record = ExampleRecord() self.extra_information = ExtraInformation() self.start_example(TOP_LABEL)
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.frozen = True self.finish_time = benchmark_time() # Intervals are sorted as longest first, then by interval start. for l in self.intervals_by_level: for i in hrange(len(l) - 1): if l[i][1] == l[i + 1][0]: self.intervals.append((l[i][0], l[i + 1][1])) self.intervals = sorted( set(self.intervals), key=lambda se: (se[0] - se[1], se[0]) ) self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes
def __init__(self, max_length, draw_bytes, observer=None): if observer is None: observer = DataObserver() assert isinstance(observer, DataObserver) self.observer = observer self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.__block_starts = defaultdict(list) self.__block_starts_calculated_to = 0 self.blocks = Blocks(self) self.buffer = bytearray() self.index = 0 self.output = u"" self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.interesting_origin = None self.draw_times = [] self.max_depth = 0 self.has_discards = False self.__result = None # Normally unpopulated but we need this in the niche case # that self.as_result() is Overrun but we still want the # examples for reporting purposes. self.__examples = None # We want the top level example to have depth 0, so we start # at -1. self.depth = -1 self.__example_record = ExampleRecord() self.extra_information = ExtraInformation() self.start_example(TOP_LABEL)
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.finish_time = benchmark_time() assert len(self.buffer) == self.index # Always finish by closing all remaining examples so that we have a # valid tree. while self.depth >= 0: self.stop_example() self.__example_record.freeze() self.frozen = True self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) self.observer.conclude_test(self.status, self.interesting_origin)
def __init__(self, max_length, draw_bytes): self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.level = 0 self.block_starts = {} self.blocks = [] self.buffer = bytearray() self.output = u'' self.status = Status.VALID self.frozen = False self.intervals_by_level = [] self.intervals = [] self.interval_stack = [] global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set()
def freeze(self): if self.frozen: assert isinstance(self.buffer, hbytes) return self.finish_time = benchmark_time() assert len(self.buffer) == self.index # Always finish by closing all remaining examples so that we have a # valid tree. while self.depth >= 0: self.stop_example() self.__example_record.freeze() self.frozen = True self.buffer = hbytes(self.buffer) self.events = frozenset(self.events) del self._draw_bytes self.observer.conclude_test(self.status, self.interesting_origin)
def __init__(self, max_length, draw_bytes): self.max_length = max_length self.is_find = False self._draw_bytes = draw_bytes self.overdraw = 0 self.block_starts = {} self.blocks = [] self.buffer = bytearray() self.index = 0 self.output = u"" self.status = Status.VALID self.frozen = False global global_test_counter self.testcounter = global_test_counter global_test_counter += 1 self.start_time = benchmark_time() self.events = set() self.forced_indices = set() self.masked_indices = {} self.interesting_origin = None self.draw_times = [] self.max_depth = 0 self.example_boundaries = [] self.__result = None # Normally unpopulated but we need this in the niche case # that self.as_result() is Overrun but we still want the # examples for reporting purposes. self.__examples = None # We want the top level example to have depth 0, so we start # at -1. self.depth = -1 self.start_example(TOP_LABEL) self.extra_information = ExtraInformation()
def find( specifier, # type: SearchStrategy condition, # type: Callable[[Any], bool] settings=None, # type: Settings random=None, # type: Any database_key=None, # type: bytes ): # type: (...) -> Any """Returns the minimal example from the given strategy ``specifier`` that matches the predicate function ``condition``.""" if settings is None: settings = Settings(max_examples=2000) settings = Settings(settings, suppress_health_check=HealthCheck.all()) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument('Expected SearchStrategy but got %r of type %s' % (specifier, type(specifier).__name__)) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] last_repr = [None] def template_condition(data): with BuildContext(data): try: data.is_find = True with deterministic_PRNG(): result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity >= Verbosity.verbose: if not successful_examples[0]: report(u'Tried non-satisfying example %s' % (nicerepr(result), )) elif success: if successful_examples[0] == 1: last_repr[0] = nicerepr(result) report(u'Found satisfying example %s' % (last_repr[0], )) last_data[0] = data elif (sort_key(hbytes(data.buffer)) < sort_key( last_data[0].buffer) ) and nicerepr(result) != last_repr[0]: last_repr[0] = nicerepr(result) report(u'Shrunk example to %s' % (last_repr[0], )) last_data[0] = data if success and not data.frozen: data.mark_interesting() start = benchmark_time() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = benchmark_time() - start if runner.interesting_examples: data = ConjectureData.for_buffer( list(runner.interesting_examples.values())[0].buffer) with BuildContext(data): with deterministic_PRNG(): return data.draw(search) if runner.valid_examples == 0 and (runner.exit_reason != ExitReason.finished): if settings.timeout > 0 and run_time > settings.timeout: raise Timeout(( # pragma: no cover 'Ran out of time before finding enough valid examples for ' '%s. Only %d valid examples found in %.2f seconds.') % (get_pretty_function_description(condition), runner.valid_examples, run_time)) else: raise Unsatisfiable('Unable to satisfy assumptions of %s.' % (get_pretty_function_description(condition), )) raise NoSuchExample(get_pretty_function_description(condition))
def find( specifier, # type: SearchStrategy condition, # type: Callable[[Any], bool] settings=None, # type: Settings random=None, # type: Any database_key=None, # type: bytes ): # type: (...) -> Any """Returns the minimal example from the given strategy ``specifier`` that matches the predicate function ``condition``.""" if settings is None: settings = Settings(max_examples=2000) settings = Settings(settings, suppress_health_check=HealthCheck.all()) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument( 'Expected SearchStrategy but got %r of type %s' % ( specifier, type(specifier).__name__ )) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] last_repr = [None] def template_condition(data): with BuildContext(data): try: data.is_find = True with deterministic_PRNG(): result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity >= Verbosity.verbose: if not successful_examples[0]: report( u'Tried non-satisfying example %s' % (nicerepr(result),)) elif success: if successful_examples[0] == 1: last_repr[0] = nicerepr(result) report(u'Found satisfying example %s' % (last_repr[0],)) last_data[0] = data elif ( sort_key(hbytes(data.buffer)) < sort_key(last_data[0].buffer) ) and nicerepr(result) != last_repr[0]: last_repr[0] = nicerepr(result) report(u'Shrunk example to %s' % (last_repr[0],)) last_data[0] = data if success and not data.frozen: data.mark_interesting() start = benchmark_time() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = benchmark_time() - start if runner.interesting_examples: data = ConjectureData.for_buffer( list(runner.interesting_examples.values())[0].buffer) with BuildContext(data): with deterministic_PRNG(): return data.draw(search) if runner.valid_examples == 0 and ( runner.exit_reason != ExitReason.finished ): if settings.timeout > 0 and run_time > settings.timeout: raise Timeout(( # pragma: no cover 'Ran out of time before finding enough valid examples for ' '%s. Only %d valid examples found in %.2f seconds.' ) % ( get_pretty_function_description(condition), runner.valid_examples, run_time)) else: raise Unsatisfiable( 'Unable to satisfy assumptions of %s.' % (get_pretty_function_description(condition),) ) raise NoSuchExample(get_pretty_function_description(condition))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = str_to_bytes(fully_qualified_name(self.test)) else: database_key = None self.start_time = benchmark_time() runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) try: runner.run() finally: self.used_examples_from_database = \ runner.used_examples_from_database note_engine_for_statistics(runner) run_time = benchmark_time() - self.start_time self.used_examples_from_database = runner.used_examples_from_database if runner.used_examples_from_database: if self.settings.derandomize: note_deprecation(( 'In future derandomize will imply database=None, but your ' 'test: %s is currently using examples from the database. ' 'To get the future behaviour, update your settings to ' 'include database=None.') % (self.test.__name__, ) ) if self.__had_seed: note_deprecation(( 'In future use of @seed will imply database=None in your ' 'settings, but your test: %s is currently using examples ' 'from the database. To get the future behaviour, update ' 'your settings for this test to include database=None.') % (self.test.__name__,) ) timed_out = runner.exit_reason == ExitReason.timeout if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True ) else: if runner.valid_examples == 0: if timed_out: raise Timeout(( 'Ran out of time before finding a satisfying ' 'example for %s. Only found %d examples in %.2fs.' ) % ( get_pretty_function_description(self.test), runner.valid_examples, run_time )) else: raise Unsatisfiable( 'Unable to satisfy assumptions of hypothesis %s.' % (get_pretty_function_description(self.test),) ) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute( ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, ) ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.' ) except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if ( self.settings.print_blob is PrintSettings.ALWAYS or ( self.settings.print_blob is PrintSettings.INFER and self.settings.verbosity >= Verbosity.normal and not can_use_repr and len(failure_blob) < 200 ) ): report(( '\n' 'You can reproduce this example by temporarily ' 'adding @reproduce_failure(%r, %r) as a decorator ' 'on your test case') % ( __version__, failure_blob,)) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky(( 'Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % ( len(self.falsifying_examples), flaky)) else: raise MultipleFailures(( 'Hypothesis found %d distinct failures.') % ( len(self.falsifying_examples,)))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = str_to_bytes(fully_qualified_name(self.test)) else: database_key = None self.start_time = benchmark_time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) self.used_examples_from_database = \ runner.used_examples_from_database note_engine_for_statistics(runner) run_time = benchmark_time() - self.start_time self.used_examples_from_database = runner.used_examples_from_database if runner.used_examples_from_database: if self.settings.derandomize: note_deprecation( 'In future derandomize will imply database=None, but your ' 'test is currently using examples from the database. To ' 'get the future behaviour, update your settings to ' 'include database=None.') if self.__had_seed: note_deprecation( 'In future use of @seed will imply database=None in your ' 'settings, but your test is currently using examples from ' 'the database. To get the future behaviour, update your ' 'settings for this test to include database=None.') timed_out = runner.exit_reason == ExitReason.timeout if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if runner.valid_examples == 0: if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for %s. Only found %d examples in %.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( 'Unable to satisfy assumptions of hypothesis %s.' % (get_pretty_function_description(self.test), )) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute(ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, )) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if (self.settings.print_blob is PrintSettings.ALWAYS or (self.settings.print_blob is PrintSettings.INFER and not can_use_repr and len(failure_blob) < 200)): report(( '\n' 'You can reproduce this example by temporarily ' 'adding @reproduce_failure(%r, %r) as a decorator ' 'on your test case') % ( __version__, failure_blob, )) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def test_function(self, data): if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT: fail_health_check( self.settings, ("Your test has been running for at least five minutes. This " "is probably not what you intended, so by default Hypothesis " "turns it into an error."), HealthCheck.hung_test, ) self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise except BaseException: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) if data.status == Status.VALID: self.valid_examples += 1 # Record the test result in the tree, to avoid unnecessary work in # the future. # The tree has two main uses: # 1. It is mildly useful in some cases during generation where there is # a high probability of duplication but it is possible to generate # many examples. e.g. if we had input of the form none() | text() # then we would generate duplicates 50% of the time, and would # like to avoid that and spend more time exploring the text() half # of the search space. The tree allows us to predict in advance if # the test would lead to a duplicate and avoid that. # 2. When shrinking it is *extremely* useful to be able to anticipate # duplication, because we try many similar and smaller test cases, # and these will tend to have a very high duplication rate. This is # where the tree usage really shines. # # Unfortunately, as well as being the less useful type of tree usage, # the first type is also the most expensive! Once we've entered shrink # mode our time remaining is essentially bounded - we're just here # until we've found the minimal example. In exploration mode, we might # be early on in a very long-running processs, and keeping everything # we've ever seen lying around ends up bloating our memory usage # substantially by causing us to use O(max_examples) memory. # # As a compromise, what we do is reset the cache every so often. This # keeps our memory usage bounded. It has a few unfortunate failure # modes in that it means that we can't always detect when we should # have stopped - if we are exploring a language which has only slightly # more than cache reset frequency number of members, we will end up # exploring indefinitely when we could have stopped. However, this is # a fairly unusual case - thanks to exponential blow-ups in language # size, most languages are either very large (possibly infinite) or # very small. Nevertheless we want CACHE_RESET_FREQUENCY to be quite # high to avoid this case coming up in practice. if (self.call_count % CACHE_RESET_FREQUENCY == 0 and not self.interesting_examples): self.reset_tree_to_empty() self.tree.add(data) if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if not self.interesting_examples: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)