class ConjectureRunner(object): def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() self.target_selector = TargetSelector(self.random) # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() # We rewrite the byte stream at various points during parsing, to one # that will produce an equivalent result but is in some sense more # canonical. We keep track of these so that when walking the tree we # can identify nodes where the exact byte value doesn't matter and # treat all bytes there as equivalent. This significantly reduces the # size of the search space and removes a lot of redundant examples. # Maps tree indices where to the unique byte that is valid at that # point. Corresponds to data.write() calls. self.forced = {} # Maps tree indices to the maximum byte that is valid at that point. # Currently this is only used inside draw_bits, but it potentially # could get used elsewhere. self.capped = {} # Where a tree node consists of the beginning of a block we track the # size of said block. This allows us to tell when an example is too # short even if it goes off the unexplored region of the tree - if it # is at the beginning of a block of size 4 but only has 3 bytes left, # it's going to overrun the end of the buffer regardless of the # buffer contents. self.block_sizes = {} self.interesting_examples = {} self.covering_examples = {} self.shrunk_examples = set() self.tag_intern_table = {} def __tree_is_exhausted(self): return 0 in self.dead def test_function(self, data): self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise e except: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) tags = frozenset(data.tags) data.tags = self.tag_intern_table.setdefault(tags, tags) if data.status == Status.VALID: self.valid_examples += 1 for t in data.tags: existing = self.covering_examples.get(t) if (existing is None or sort_key(data.buffer) < sort_key(existing.buffer)): self.covering_examples[t] = data if self.database is not None: self.database.save(self.covering_key, data.buffer) if existing is not None: self.database.delete(self.covering_key, existing.buffer) tree_node = self.tree[0] indices = [] node_index = 0 for i, b in enumerate(data.buffer): indices.append(node_index) if i in data.forced_indices: self.forced[node_index] = b try: self.capped[node_index] = data.capped_indices[i] except KeyError: pass try: node_index = tree_node[b] except KeyError: node_index = len(self.tree) self.tree.append({}) tree_node[b] = node_index tree_node = self.tree[node_index] if node_index in self.dead: break for u, v in data.blocks: # This can happen if we hit a dead node when walking the buffer. # In that case we alrady have this section of the tree mapped. if u >= len(indices): break self.block_sizes[indices[u]] = v - u if data.status != Status.OVERRUN and node_index not in self.dead: self.dead.add(node_index) self.tree[node_index] = data for j in reversed(indices): if (len(self.tree[j]) < self.capped.get(j, 255) + 1 and j not in self.forced): break if set(self.tree[j].values()).issubset(self.dead): self.dead.add(j) else: break last_data_is_interesting = (self.last_data is not None and self.last_data.status == Status.INTERESTING) if data.status == Status.INTERESTING: first_call = len(self.interesting_examples) == 0 key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True else: if sort_key(data.buffer) < sort_key(existing.buffer): self.downgrade_buffer(existing.buffer) changed = True if changed: self.interesting_examples[key] = data self.shrunk_examples.discard(key) if last_data_is_interesting and not first_call: self.shrinks += 1 if not last_data_is_interesting or ( sort_key(data.buffer) < sort_key(self.last_data.buffer) and data.interesting_origin == self.last_data.interesting_origin): self.last_data = data if self.shrinks >= self.settings.max_shrinks: self.exit_with(ExitReason.max_shrinks) elif (self.last_data is None or self.last_data.status < Status.INTERESTING): self.last_data = data if (self.settings.timeout > 0 and time.time() >= self.start_time + self.settings.timeout): self.exit_with(ExitReason.timeout) if not self.interesting_examples: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) def save_buffer(self, buffer, key=None): if self.settings.database is not None: if key is None: key = self.database_key if key is None: return self.settings.database.save(key, hbytes(buffer)) def downgrade_buffer(self, buffer): if self.settings.database is not None: self.settings.database.move(self.database_key, self.secondary_key, buffer) @property def secondary_key(self): return b'.'.join((self.database_key, b"secondary")) @property def covering_key(self): return b'.'.join((self.database_key, b"coverage")) def note_details(self, data): if data.status == Status.INTERESTING: if (self.last_data is None or self.last_data.status != Status.INTERESTING or self.last_data.interesting_origin == data.interesting_origin): self.save_buffer(data.buffer) else: self.save_buffer(data.buffer, self.secondary_key) runtime = max(data.finish_time - data.start_time, 0.0) self.status_runtimes.setdefault(data.status, []).append(runtime) for event in set(map(self.event_to_string, data.events)): self.event_call_counts[event] += 1 def debug(self, message): with self.settings: debug_report(message) def debug_data(self, data): buffer_parts = [u"["] for i, (u, v) in enumerate(data.blocks): if i > 0: buffer_parts.append(u" || ") buffer_parts.append(u', '.join( int_to_text(int(i)) for i in data.buffer[u:v])) buffer_parts.append(u']') status = unicode_safe_repr(data.status) if data.status == Status.INTERESTING: status = u'%s (%s)' % ( status, unicode_safe_repr(data.interesting_origin, )) self.debug(u'%d bytes %s -> %s, %s' % ( data.index, u''.join(buffer_parts), status, data.output, )) def prescreen_buffer(self, buffer): """Attempt to rule out buffer as a possible interesting candidate. Returns False if we know for sure that running this buffer will not produce an interesting result. Returns True if it might (because it explores territory we have not previously tried). This is purely an optimisation to try to reduce the number of tests we run. "return True" would be a valid but inefficient implementation. """ node_index = 0 n = len(buffer) for k, b in enumerate(buffer): if node_index in self.dead: return False try: # The block size at that point provides a lower bound on how # many more bytes are required. If the buffer does not have # enough bytes to fulfill that block size then we can rule out # this buffer. if k + self.block_sizes[node_index] > n: return False except KeyError: pass try: b = self.forced[node_index] except KeyError: pass try: b = min(b, self.capped[node_index]) except KeyError: pass try: node_index = self.tree[node_index][b] except KeyError: return True else: return False def incorporate_new_buffer(self, buffer): assert self.last_data.status == Status.INTERESTING start = self.last_data.interesting_origin buffer = hbytes(buffer[:self.last_data.index]) assert sort_key(buffer) < sort_key(self.last_data.buffer) if not self.prescreen_buffer(buffer): return False assert sort_key(buffer) <= sort_key(self.last_data.buffer) data = ConjectureData.for_buffer(buffer) self.test_function(data) assert self.last_data.interesting_origin == start return data is self.last_data def run(self): with self.settings: try: self._run() except RunIsComplete: pass if self.interesting_examples: self.last_data = max(self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)) if self.last_data is not None: self.debug_data(self.last_data) self.debug( u'Run complete after %d examples (%d valid) and %d shrinks' % ( self.call_count, self.valid_examples, self.shrinks, )) def _new_mutator(self): def draw_new(data, n): return uniform(self.random, n) def draw_existing(data, n): return self.last_data.buffer[data.index:data.index + n] def draw_smaller(data, n): existing = self.last_data.buffer[data.index:data.index + n] r = uniform(self.random, n) if r <= existing: return r return _draw_predecessor(self.random, existing) def draw_larger(data, n): existing = self.last_data.buffer[data.index:data.index + n] r = uniform(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing) def reuse_existing(data, n): choices = data.block_starts.get(n, []) or \ self.last_data.block_starts.get(n, []) if choices: i = self.random.choice(choices) return self.last_data.buffer[i:i + n] else: result = uniform(self.random, n) assert isinstance(result, hbytes) return result def flip_bit(data, n): buf = bytearray(self.last_data.buffer[data.index:data.index + n]) i = self.random.randint(0, n - 1) k = self.random.randint(0, 7) buf[i] ^= (1 << k) return hbytes(buf) def draw_zero(data, n): return hbytes(b'\0' * n) def draw_max(data, n): return hbytes([255]) * n def draw_constant(data, n): return bytes_from_list([self.random.randint(0, 255)] * n) def redraw_last(data, n): u = self.last_data.blocks[-1][0] if data.index + n <= u: return self.last_data.buffer[data.index:data.index + n] else: return uniform(self.random, n) options = [ draw_new, redraw_last, redraw_last, reuse_existing, reuse_existing, draw_existing, draw_smaller, draw_larger, flip_bit, draw_zero, draw_max, draw_zero, draw_max, draw_constant, ] bits = [self.random.choice(options) for _ in hrange(3)] def draw_mutated(data, n): if (data.index + n > len(self.last_data.buffer)): result = uniform(self.random, n) else: result = self.random.choice(bits)(data, n) return self.__rewrite_for_novelty(data, self.__zero_bound(data, result)) return draw_mutated def __rewrite(self, data, result): return self.__rewrite_for_novelty(data, self.__zero_bound(data, result)) def __zero_bound(self, data, result): """This tries to get the size of the generated data under control by replacing the result with zero if we are too deep or have already generated too much data. This causes us to enter "shrinking mode" there and thus reduce the size of the generated data. """ if (data.depth * 2 >= MAX_DEPTH or (data.index + len(result)) * 2 >= self.settings.buffer_size): if any(result): data.hit_zero_bound = True return hbytes(len(result)) else: return result def __rewrite_for_novelty(self, data, result): """Take a block that is about to be added to data as the result of a draw_bytes call and rewrite it a small amount to ensure that the result will be novel: that is, not hit a part of the tree that we have fully explored. This is mostly useful for test functions which draw a small number of blocks. """ assert isinstance(result, hbytes) try: node_index = data.__current_node_index except AttributeError: node_index = 0 data.__current_node_index = node_index data.__hit_novelty = False data.__evaluated_to = 0 if data.__hit_novelty: return result node = self.tree[node_index] for i in hrange(data.__evaluated_to, len(data.buffer)): node = self.tree[node_index] try: node_index = node[data.buffer[i]] assert node_index not in self.dead node = self.tree[node_index] except KeyError: data.__hit_novelty = True return result for i, b in enumerate(result): assert isinstance(b, int) try: new_node_index = node[b] except KeyError: data.__hit_novelty = True return result new_node = self.tree[new_node_index] if new_node_index in self.dead: if isinstance(result, hbytes): result = bytearray(result) for c in range(256): if c not in node: assert c <= self.capped.get(node_index, c) result[i] = c data.__hit_novelty = True return hbytes(result) else: new_node_index = node[c] new_node = self.tree[new_node_index] if new_node_index not in self.dead: result[i] = c break else: # pragma: no cover assert False, ( 'Found a tree node which is live despite all its ' 'children being dead.') node_index = new_node_index node = new_node assert node_index not in self.dead data.__current_node_index = node_index data.__evaluated_to = data.index + len(result) return hbytes(result) @property def database(self): if self.database_key is None: return None return self.settings.database def has_existing_examples(self): return (self.database is not None and Phase.reuse in self.settings.phases) def reuse_existing_examples(self): """If appropriate (we have a database and have been told to use it), try to reload existing examples from the database. If there are a lot we don't try all of them. We always try the smallest example in the database (which is guaranteed to be the last failure) and the largest (which is usually the seed example which the last failure came from but we don't enforce that). We then take a random sampling of the remainder and try those. Any examples that are no longer interesting are cleared out. """ if self.has_existing_examples(): self.debug('Reusing examples from database') # We have to do some careful juggling here. We have two database # corpora: The primary and secondary. The primary corpus is a # small set of minimized examples each of which has at one point # demonstrated a distinct bug. We want to retry all of these. # We also have a secondary corpus of examples that have at some # point demonstrated interestingness (currently only ones that # were previously non-minimal examples of a bug, but this will # likely expand in future). These are a good source of potentially # interesting examples, but there are a lot of them, so we down # sample the secondary corpus to a more manageable size. corpus = sorted(self.settings.database.fetch(self.database_key), key=sort_key) desired_size = max(2, ceil(0.1 * self.settings.max_examples)) for extra_key in [self.secondary_key, self.covering_key]: if len(corpus) < desired_size: extra_corpus = list( self.settings.database.fetch(extra_key), ) shortfall = desired_size - len(corpus) if len(extra_corpus) <= shortfall: extra = extra_corpus else: extra = self.random.sample(extra_corpus, shortfall) extra.sort(key=sort_key) corpus.extend(extra) for existing in corpus: self.last_data = ConjectureData.for_buffer(existing) try: self.test_function(self.last_data) finally: if self.last_data.status != Status.INTERESTING: self.settings.database.delete(self.database_key, existing) self.settings.database.delete(self.secondary_key, existing) def exit_with(self, reason): self.exit_reason = reason raise RunIsComplete() def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = ConjectureData(max_length=self.settings.buffer_size, draw_bytes=lambda data, n: self. __rewrite_for_novelty(data, hbytes(n))) self.test_function(zero_data) count = 0 while count < 10 and not self.interesting_examples: def draw_bytes(data, n): return self.__rewrite_for_novelty( data, self.__zero_bound(data, uniform(self.random, n))) targets_found = len(self.covering_examples) self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=draw_bytes) self.test_function(self.last_data) self.last_data.freeze() if len(self.covering_examples) > targets_found: count = 0 else: count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() else: target, last_data = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) prev_data = self.last_data data = ConjectureData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() if (data.status > prev_data.status or len(self.covering_examples) > targets_found): mutations = 0 elif (data.status < prev_data.status or not self.target_selector.has_tag(target, data) or mutations >= self.settings.max_mutations): mutations = 0 mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1 def _run(self): self.last_data = None self.start_time = time.time() self.reuse_existing_examples() self.generate_new_examples() if (Phase.shrink not in self.settings.phases or not self.interesting_examples): self.exit_with(ExitReason.finished) for prev_data in sorted(self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)): assert prev_data.status == Status.INTERESTING data = ConjectureData.for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) while len(self.shrunk_examples) < len(self.interesting_examples): target, self.last_data = min( [(k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples], key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug('Shrinking %r' % (target, )) assert self.last_data.interesting_origin == target self.shrink() self.shrunk_examples.add(target) self.exit_with(ExitReason.finished) def try_buffer_with_rewriting_from(self, initial_attempt, v): initial_data = None node_index = 0 for c in initial_attempt: try: node_index = self.tree[node_index][c] except KeyError: break node = self.tree[node_index] if isinstance(node, ConjectureData): initial_data = node break if initial_data is None: initial_data = ConjectureData.for_buffer(initial_attempt) self.test_function(initial_data) if initial_data.status == Status.INTERESTING: return initial_data is self.last_data # If this produced something completely invalid we ditch it # here rather than trying to persevere. if initial_data.status < Status.VALID: return False if len(initial_data.buffer) < v: return False lost_data = len(self.last_data.buffer) - \ len(initial_data.buffer) # If this did not in fact cause the data size to shrink we # bail here because it's not worth trying to delete stuff from # the remainder. if lost_data <= 0: return False try_with_deleted = bytearray(initial_attempt) del try_with_deleted[v:v + lost_data] try_with_deleted.extend(hbytes(lost_data - 1)) if self.incorporate_new_buffer(try_with_deleted): return True for r, s in self.last_data.intervals: if (r >= v and s - r <= lost_data and r < len(initial_data.buffer)): try_with_deleted = bytearray(initial_attempt) del try_with_deleted[r:s] try_with_deleted.extend(hbytes(s - r - 1)) if self.incorporate_new_buffer(try_with_deleted): return True return False def delta_interval_deletion(self): """Attempt to delete every interval in the example.""" self.debug('delta interval deletes') # We do a delta-debugging style thing here where we initially try to # delete many intervals at once and prune it down exponentially to # eventually only trying to delete one interval at a time. # I'm a little skeptical that this is helpful in general, but we've # got at least one benchmark where it does help. k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False if not self.incorporate_new_buffer( hbytes(b for b, v in zip(self.last_data.buffer, bitmask) if v)): i += k k //= 2 def greedy_interval_deletion(self): """Attempt to delete every interval in the example.""" self.debug('greedy interval deletes') i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer(self.last_data.buffer[:u] + self.last_data.buffer[v:]): i += 1 def coarse_block_replacement(self): """Attempts to zero every block. This is a very coarse pass that we only run once to attempt to remove some irrelevant detail. The main purpose of it is that if we manage to zero a lot of data then many attempted deletes become duplicates of each other, so we run fewer tests. If more blocks become possible to zero later that will be handled by minimize_individual_blocks. The point of this is simply to provide a fairly fast initial pass. """ self.debug('Zeroing blocks') i = 0 while i < len(self.last_data.blocks): buf = self.last_data.buffer u, v = self.last_data.blocks[i] assert u < v block = buf[u:v] if any(block): self.incorporate_new_buffer(buf[:u] + hbytes(v - u) + buf[v:]) i += 1 def minimize_duplicated_blocks(self): """Find blocks that have been duplicated in multiple places and attempt to minimize all of the duplicates simultaneously.""" self.debug('Simultaneous shrinking of duplicated blocks') counts = Counter(self.last_data.buffer[u:v] for u, v in self.last_data.blocks) blocks = [k for k, count in counts.items() if count > 1] thresholds = {} for u, v in self.last_data.blocks: b = self.last_data.buffer[u:v] thresholds[b] = v blocks.sort(reverse=True) blocks.sort(key=lambda b: counts[b] * len(b), reverse=True) for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes( EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts)) threshold = thresholds[block] minimize(block, lambda b: self.try_buffer_with_rewriting_from( replace(b), threshold), random=self.random, full=False) def minimize_individual_blocks(self): self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.try_buffer_with_rewriting_from( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], v), random=self.random, full=False, ) i += 1 def reorder_blocks(self): self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = (buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 def shrink(self): # We assume that if an all-zero block of bytes is an interesting # example then we're not going to do better than that. # This might not technically be true: e.g. for integers() | booleans() # the simplest example is actually [1, 0]. Missing this case is fairly # harmless and this allows us to make various simplifying assumptions # about the structure of the data (principally that we're never # operating on a block of all zero bytes so can use non-zeroness as a # signpost of complexity). if (not any(self.last_data.buffer) or self.incorporate_new_buffer( hbytes(len(self.last_data.buffer)))): return if self.has_existing_examples(): # If we have any smaller examples in the secondary corpus, now is # a good time to try them to see if they work as shrinks. They # probably won't, but it's worth a shot and gives us a good # opportunity to clear out the database. # It's not worth trying the primary corpus because we already # tried all of those in the initial phase. corpus = sorted(self.settings.database.fetch(self.secondary_key), key=sort_key) for c in corpus: if sort_key(c) >= sort_key(self.last_data.buffer): break elif self.incorporate_new_buffer(c): break else: self.settings.database.delete(self.secondary_key, c) # Coarse passes that are worth running once when the example is likely # to be "far from shrunk" but not worth repeating in a loop because # they are subsumed by more fine grained passes. self.delta_interval_deletion() self.coarse_block_replacement() change_counter = -1 while self.shrinks > change_counter: change_counter = self.shrinks self.minimize_duplicated_blocks() self.minimize_individual_blocks() self.reorder_blocks() self.greedy_interval_deletion() def event_to_string(self, event): if isinstance(event, str): return event try: return self.events_to_strings[event] except KeyError: pass result = str(event) self.events_to_strings[event] = result return result
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = ConjectureData(max_length=self.settings.buffer_size, draw_bytes=lambda data, n: self. __rewrite_for_novelty(data, hbytes(n))) self.test_function(zero_data) count = 0 while count < 10 and not self.interesting_examples: def draw_bytes(data, n): return self.__rewrite_for_novelty( data, self.__zero_bound(data, uniform(self.random, n))) targets_found = len(self.covering_examples) self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=draw_bytes) self.test_function(self.last_data) self.last_data.freeze() if len(self.covering_examples) > targets_found: count = 0 else: count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() else: target, last_data = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) prev_data = self.last_data data = ConjectureData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() if (data.status > prev_data.status or len(self.covering_examples) > targets_found): mutations = 0 elif (data.status < prev_data.status or not self.target_selector.has_tag(target, data) or mutations >= self.settings.max_mutations): mutations = 0 mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1
def _run(self): self.last_data = None mutations = 0 start_time = time.time() self.reuse_existing_examples() if ( Phase.generate in self.settings.phases and not self.__tree_is_exhausted() ): if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() zero_bound_queue = [] while ( self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted() ): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) self.random.shuffle(buffer) buffer = hbytes(buffer) if buffer == overdrawn.buffer: continue def draw_bytes(data, n, distribution): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() elif mutations >= self.settings.max_mutations: mutations = 0 data = self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return self.shrink()
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = self.cached_test_function(hbytes( self.settings.buffer_size)) if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and len(zero_data.buffer) * 2 > self.settings.buffer_size): fail_health_check( self.settings, "The smallest natural example for your test is extremely " "large. This makes it difficult for Hypothesis to generate " "good examples, especially when trying to reduce failing ones " "at the end. Consider reducing the size of your data if it is " "of a fixed size. You could also fix this by improving how " "your data shrinks (see https://hypothesis.readthedocs.io/en/" "latest/data.html#shrinking for details), or by introducing " "default values inside your strategy. e.g. could you replace " "some arguments with their defaults by using " "one_of(none(), some_complex_strategy)?", HealthCheck.large_base_example, ) if zero_data is not Overrun: # If the language starts with writes of length >= cap then there is # only one string in it: Everything after cap is forced to be zero (or # to be whatever value is written there). That means that once we've # tried the zero value, there's nothing left for us to do, so we # exit early here. for i in hrange(self.cap): if i not in zero_data.forced_indices: break else: self.exit_with(ExitReason.finished) self.health_check_state = HealthCheckState() count = 0 while not self.interesting_examples and ( count < 10 or self.health_check_state is not None): prefix = self.generate_novel_prefix() def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index:data.index + n] if len(result) < n: result += uniform(self.random, n - len(result)) else: result = uniform(self.random, n) return self.__zero_bound(data, result) targets_found = len(self.covering_examples) last_data = ConjectureData(max_length=self.settings.buffer_size, draw_bytes=draw_bytes) self.test_function(last_data) last_data.freeze() count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData(draw_bytes=draw_bytes, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() else: origin = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) data = ConjectureData(draw_bytes=mutator(origin), max_length=self.settings.buffer_size) self.test_function(data) data.freeze() if (data.status > origin.status or len(self.covering_examples) > targets_found): mutations = 0 elif data.status < origin.status or mutations >= 10: # Cap the variations of a single example and move on to # an entirely fresh start. Ten is an entirely arbitrary # constant, but it's been working well for years. mutations = 0 mutator = self._new_mutator() if getattr(data, "hit_zero_bound", False): zero_bound_queue.append(data) mutations += 1
def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=lambda data, n: self.__rewrite_for_novelty( data, hbytes(n))) self.test_function(zero_data) count = 0 while count < 10 and not self.interesting_examples: def draw_bytes(data, n): return self.__rewrite_for_novelty( data, self.__zero_bound(data, uniform(self.random, n)) ) targets_found = len(self.covering_examples) self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=draw_bytes ) self.test_function(self.last_data) self.last_data.freeze() if len(self.covering_examples) > targets_found: count = 0 else: count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() else: target, last_data = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) prev_data = self.last_data data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() if ( data.status > prev_data.status or len(self.covering_examples) > targets_found ): mutations = 0 elif ( data.status < prev_data.status or not self.target_selector.has_tag(target, data) or mutations >= self.settings.max_mutations ): mutations = 0 mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1
class ConjectureRunner(object): def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.changed = 0 self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() self.forced = {} def __tree_is_exhausted(self): return 0 in self.dead def new_buffer(self): assert not self.__tree_is_exhausted() def draw_bytes(data, n, distribution): return self.__rewrite_for_novelty( data, self.__zero_bound(data, distribution(self.random, n)) ) self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=draw_bytes ) self.test_function(self.last_data) self.last_data.freeze() def test_function(self, data): self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise e except: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.debug_data(data) if data.status >= Status.VALID: self.valid_examples += 1 tree_node = self.tree[0] indices = [] node_index = 0 for i, b in enumerate(data.buffer): indices.append(node_index) if i in data.forced_indices: self.forced[node_index] = b try: node_index = tree_node[b] except KeyError: node_index = len(self.tree) self.tree.append({}) tree_node[b] = node_index tree_node = self.tree[node_index] if node_index in self.dead: break if data.status != Status.OVERRUN and node_index not in self.dead: self.dead.add(node_index) self.tree[node_index] = data for j in reversed(indices): if len(self.tree[j]) < 256 and j not in self.forced: break if set(self.tree[j].values()).issubset(self.dead): self.dead.add(j) else: break def consider_new_test_data(self, data): # Transition rules: # 1. Transition cannot decrease the status # 2. Any transition which increases the status is valid # 3. If the previous status was interesting, only shrinking # transitions are allowed. if data.buffer == self.last_data.buffer: return False if self.last_data.status < data.status: return True if self.last_data.status > data.status: return False if data.status == Status.INVALID: return data.index >= self.last_data.index if data.status == Status.OVERRUN: return data.overdraw <= self.last_data.overdraw if data.status == Status.INTERESTING: assert len(data.buffer) <= len(self.last_data.buffer) if len(data.buffer) == len(self.last_data.buffer): return data.buffer < self.last_data.buffer return True return True def save_buffer(self, buffer): if ( self.settings.database is not None and self.database_key is not None ): self.settings.database.save(self.database_key, hbytes(buffer)) def note_details(self, data): if data.status == Status.INTERESTING: self.save_buffer(data.buffer) runtime = max(data.finish_time - data.start_time, 0.0) self.status_runtimes.setdefault(data.status, []).append(runtime) for event in set(map(self.event_to_string, data.events)): self.event_call_counts[event] += 1 def debug(self, message): with self.settings: debug_report(message) def debug_data(self, data): self.debug(u'%d bytes %s -> %s, %s' % ( data.index, unicode_safe_repr(list(data.buffer[:data.index])), unicode_safe_repr(data.status), data.output, )) def prescreen_buffer(self, buffer): i = 0 for b in buffer: if i in self.dead: return False try: b = self.forced[i] except KeyError: pass try: i = self.tree[i][b] except KeyError: return True else: return False def incorporate_new_buffer(self, buffer): assert self.last_data.status == Status.INTERESTING if ( self.settings.timeout > 0 and time.time() >= self.start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout raise RunIsComplete() buffer = hbytes(buffer[:self.last_data.index]) if sort_key(buffer) >= sort_key(self.last_data.buffer): return False if not self.prescreen_buffer(buffer): return False assert sort_key(buffer) <= sort_key(self.last_data.buffer) data = ConjectureData.for_buffer(buffer) self.test_function(data) if self.consider_new_test_data(data): self.shrinks += 1 self.last_data = data if self.shrinks >= self.settings.max_shrinks: self.exit_reason = ExitReason.max_shrinks raise RunIsComplete() self.last_data = data self.changed += 1 return True return False def run(self): with self.settings: try: self._run() except RunIsComplete: pass self.debug( u'Run complete after %d examples (%d valid) and %d shrinks' % ( self.call_count, self.valid_examples, self.shrinks, )) def _new_mutator(self): def draw_new(data, n, distribution): return distribution(self.random, n) def draw_existing(data, n, distribution): return self.last_data.buffer[data.index:data.index + n] def draw_smaller(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r <= existing: return r return _draw_predecessor(self.random, existing) def draw_larger(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing) def reuse_existing(data, n, distribution): choices = data.block_starts.get(n, []) or \ self.last_data.block_starts.get(n, []) if choices: i = self.random.choice(choices) return self.last_data.buffer[i:i + n] else: result = distribution(self.random, n) assert isinstance(result, hbytes) return result def flip_bit(data, n, distribution): buf = bytearray( self.last_data.buffer[data.index:data.index + n]) i = self.random.randint(0, n - 1) k = self.random.randint(0, 7) buf[i] ^= (1 << k) return hbytes(buf) def draw_zero(data, n, distribution): return hbytes(b'\0' * n) def draw_max(data, n, distribution): return hbytes([255]) * n def draw_constant(data, n, distribution): return bytes_from_list([ self.random.randint(0, 255) ] * n) options = [ draw_new, reuse_existing, reuse_existing, draw_existing, draw_smaller, draw_larger, flip_bit, draw_zero, draw_max, draw_zero, draw_max, draw_constant, ] bits = [ self.random.choice(options) for _ in hrange(3) ] def draw_mutated(data, n, distribution): if ( data.index + n > len(self.last_data.buffer) ): result = distribution(self.random, n) else: result = self.random.choice(bits)(data, n, distribution) return self.__rewrite_for_novelty( data, self.__zero_bound(data, result)) return draw_mutated def __rewrite(self, data, result): return self.__rewrite_for_novelty( data, self.__zero_bound(data, result) ) def __zero_bound(self, data, result): """This tries to get the size of the generated data under control by replacing the result with zero if we are too deep or have already generated too much data. This causes us to enter "shrinking mode" there and thus reduce the size of the generated data. """ if ( data.depth * 2 >= MAX_DEPTH or (data.index + len(result)) * 2 >= self.settings.buffer_size ): if any(result): data.hit_zero_bound = True return hbytes(len(result)) else: return result def __rewrite_for_novelty(self, data, result): """Take a block that is about to be added to data as the result of a draw_bytes call and rewrite it a small amount to ensure that the result will be novel: that is, not hit a part of the tree that we have fully explored. This is mostly useful for test functions which draw a small number of blocks. """ assert isinstance(result, hbytes) try: node_index = data.__current_node_index except AttributeError: node_index = 0 data.__current_node_index = node_index data.__hit_novelty = False data.__evaluated_to = 0 if data.__hit_novelty: return result node = self.tree[node_index] for i in hrange(data.__evaluated_to, len(data.buffer)): node = self.tree[node_index] try: node_index = node[data.buffer[i]] assert node_index not in self.dead node = self.tree[node_index] except KeyError: data.__hit_novelty = True return result for i, b in enumerate(result): assert isinstance(b, int) try: new_node_index = node[b] except KeyError: data.__hit_novelty = True return result new_node = self.tree[new_node_index] if new_node_index in self.dead: if isinstance(result, hbytes): result = bytearray(result) for c in range(256): if c not in node: result[i] = c data.__hit_novelty = True return hbytes(result) else: new_node_index = node[c] new_node = self.tree[new_node_index] if new_node_index not in self.dead: result[i] = c break else: # pragma: no cover assert False, ( 'Found a tree node which is live despite all its ' 'children being dead.') node_index = new_node_index node = new_node assert node_index not in self.dead data.__current_node_index = node_index data.__evaluated_to = data.index + len(result) return hbytes(result) def has_existing_examples(self): return ( self.settings.database is not None and self.database_key is not None and Phase.reuse in self.settings.phases ) def reuse_existing_examples(self): """If appropriate (we have a database and have been told to use it), try to reload existing examples from the database. If there are a lot we don't try all of them. We always try the smallest example in the database (which is guaranteed to be the last failure) and the largest (which is usually the seed example which the last failure came from but we don't enforce that). We then take a random sampling of the remainder and try those. Any examples that are no longer interesting are cleared out. """ if self.has_existing_examples(): corpus = sorted( self.settings.database.fetch(self.database_key), key=sort_key ) desired_size = max(2, ceil(0.1 * self.settings.max_examples)) if desired_size < len(corpus): new_corpus = [corpus[0], corpus[-1]] n_boost = max(desired_size - 2, 0) new_corpus.extend(self.random.sample(corpus[1:-1], n_boost)) corpus = new_corpus corpus.sort(key=sort_key) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_with(ExitReason.max_iterations) data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data self.consider_new_test_data(data) if data.status == Status.INTERESTING: assert data.status == Status.INTERESTING self.last_data = data break else: self.settings.database.delete( self.database_key, existing) def exit_with(self, reason): self.exit_reason = reason raise RunIsComplete() def _run(self): self.last_data = None mutations = 0 start_time = time.time() self.reuse_existing_examples() if ( Phase.generate in self.settings.phases and not self.__tree_is_exhausted() ): if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() zero_bound_queue = [] while ( self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted() ): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) self.random.shuffle(buffer) buffer = hbytes(buffer) if buffer == overdrawn.buffer: continue def draw_bytes(data, n, distribution): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() elif mutations >= self.settings.max_mutations: mutations = 0 data = self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return self.shrink() def zero_blocks(self): """Try replacing blocks with zero blocks, starting from the right and proceeding leftwards. Normally we would proceed from left to right, in keeping with our policy of lexicographic minimization - making shrinks to the right seems like it should be "wasted work" which we might undo later. The motivation for doing it this way is that this can unlock shrinks that would become impossible otherwise: If we shrink entirely moving rightwards, then this ends up with a lot of the complexity of an example "trapped at the end", leaving a lot of dead space in the middle. An example of where this can happen is with lists or matrices defined by a length parameter, where only one or two of the values actually matter: If we start from the left then what we'll find is we replace all the early values with zero, leave the later values as the ones that matter, and then we can't shrink the length parameter. """ self.debug('Zeroing individual blocks') # We first do a binary search on the hope that a lot of blocks are # replacable. If not, we only pay a log(n) cost so it's no big deal. # We can replace all blocks >= hi with zero. We cannot replace # all blocks >= lo with zero. lo = 0 hi = len(self.last_data.blocks) while lo + 1 < hi: mid = (lo + hi) // 2 try: u = self.last_data.blocks[mid][0] except IndexError: # This shouldn't really happen, but may in the presence of a # bad test function whose block structure varies based on some # sort of external data. We could possibly detect this better # and signal an error, but it's hard to do so reliably so # instead we just try to be robust in the face of it. break if self.incorporate_new_buffer( self.last_data.buffer[:u] + hbytes(len(self.last_data.buffer) - u), ): hi = mid else: lo = mid for i in hrange(len(self.last_data.blocks) - 1, -1, -1): # The case where this is not true is hard to hit reliably, and only # exists for similar reasons to the above: It guards against # invalid data generation. if i < len(self.last_data.blocks): # pragma: no branch u, v = self.last_data.blocks[i] self.incorporate_new_buffer( self.last_data.buffer[:u] + hbytes(v - u) + self.last_data.buffer[v:], ) def shrink(self): # We assume that if an all-zero block of bytes is an interesting # example then we're not going to do better than that. # This might not technically be true: e.g. for integers() | booleans() # the simplest example is actually [1, 0]. Missing this case is fairly # harmless and this allows us to make various simplifying assumptions # about the structure of the data (principally that we're never # operating on a block of all zero bytes so can use non-zeroness as a # signpost of complexity). if ( not any(self.last_data.buffer) or self.incorporate_new_buffer(hbytes(len(self.last_data.buffer))) ): self.exit_reason = ExitReason.finished return if self.has_existing_examples(): corpus = sorted( self.settings.database.fetch(self.database_key), key=sort_key ) # We always have self.last_data.buffer in the database because # we save every interesting example. This means we will always # trigger the first break and thus never exit the loop normally. for c in corpus: # pragma: no branch if sort_key(c) >= sort_key(self.last_data.buffer): break elif self.incorporate_new_buffer(c): break else: self.settings.database.delete(self.database_key, c) change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Structured interval deletes') k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer(hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v )): i += k k //= 2 self.zero_blocks() minimize( self.last_data.buffer, self.incorporate_new_buffer, cautious=True, random=self.random, ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Bulk replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u buffer = bytearray() for r, s in self.last_data.blocks: if s - r == n and self.last_data.buffer[r:s] > block: buffer.extend(block) else: buffer.extend(self.last_data.buffer[r:s]) self.incorporate_new_buffer(hbytes(buffer)) i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes(EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts )) minimize( block, lambda b: self.incorporate_new_buffer(replace(b)), random=self.random, ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], ), random=self.random, ) i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = ( buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 self.debug('Shuffling suffixes while shrinking %r' % ( self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize( self.last_data.buffer[:cutoff], test_value, cautious=True, random=self.random, ) b += 1 self.exit_reason = ExitReason.finished def event_to_string(self, event): if isinstance(event, str): return event try: return self.events_to_strings[event] except KeyError: pass result = str(event) self.events_to_strings[event] = result return result
def _run(self): self.last_data = None mutations = 0 start_time = time.time() self.reuse_existing_examples() if ( Phase.generate in self.settings.phases and not self.__tree_is_exhausted() ): if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() zero_bound_queue = [] while ( self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted() ): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) if buffer == overdrawn.buffer: continue def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() elif mutations >= self.settings.max_mutations: mutations = 0 data = self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return while len(self.shrunk_examples) < len(self.interesting_examples): target, d = min([ (k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples], key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug('Shrinking %r' % (target,)) self.last_data = d assert self.last_data.interesting_origin == target self.shrink() self.shrunk_examples.add(target)
class ConjectureRunner(object): def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() self.target_selector = TargetSelector(self.random) # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() # We rewrite the byte stream at various points during parsing, to one # that will produce an equivalent result but is in some sense more # canonical. We keep track of these so that when walking the tree we # can identify nodes where the exact byte value doesn't matter and # treat all bytes there as equivalent. This significantly reduces the # size of the search space and removes a lot of redundant examples. # Maps tree indices where to the unique byte that is valid at that # point. Corresponds to data.write() calls. self.forced = {} # Maps tree indices to the maximum byte that is valid at that point. # Currently this is only used inside draw_bits, but it potentially # could get used elsewhere. self.capped = {} # Where a tree node consists of the beginning of a block we track the # size of said block. This allows us to tell when an example is too # short even if it goes off the unexplored region of the tree - if it # is at the beginning of a block of size 4 but only has 3 bytes left, # it's going to overrun the end of the buffer regardless of the # buffer contents. self.block_sizes = {} self.interesting_examples = {} self.covering_examples = {} self.shrunk_examples = set() self.tag_intern_table = {} def __tree_is_exhausted(self): return 0 in self.dead def test_function(self, data): self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise e except: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.target_selector.add(data) self.debug_data(data) tags = frozenset( self.tag_intern_table.setdefault(t, t) for t in data.tags ) data.tags = self.tag_intern_table.setdefault(tags, tags) if data.status == Status.VALID: self.valid_examples += 1 for t in data.tags: existing = self.covering_examples.get(t) if ( existing is None or sort_key(data.buffer) < sort_key(existing.buffer) ): self.covering_examples[t] = data if self.database is not None: self.database.save(self.covering_key, data.buffer) if existing is not None: self.database.delete( self.covering_key, existing.buffer) tree_node = self.tree[0] indices = [] node_index = 0 for i, b in enumerate(data.buffer): indices.append(node_index) if i in data.forced_indices: self.forced[node_index] = b try: self.capped[node_index] = data.capped_indices[i] except KeyError: pass try: node_index = tree_node[b] except KeyError: node_index = len(self.tree) self.tree.append({}) tree_node[b] = node_index tree_node = self.tree[node_index] if node_index in self.dead: break for u, v in data.blocks: # This can happen if we hit a dead node when walking the buffer. # In that case we alrady have this section of the tree mapped. if u >= len(indices): break self.block_sizes[indices[u]] = v - u if data.status != Status.OVERRUN and node_index not in self.dead: self.dead.add(node_index) self.tree[node_index] = data for j in reversed(indices): if ( len(self.tree[j]) < self.capped.get(j, 255) + 1 and j not in self.forced ): break if set(self.tree[j].values()).issubset(self.dead): self.dead.add(j) else: break last_data_is_interesting = ( self.last_data is not None and self.last_data.status == Status.INTERESTING ) if data.status == Status.INTERESTING: first_call = len(self.interesting_examples) == 0 key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True else: if sort_key(data.buffer) < sort_key(existing.buffer): self.downgrade_buffer(existing.buffer) changed = True if changed: self.interesting_examples[key] = data self.shrunk_examples.discard(key) if last_data_is_interesting and not first_call: self.shrinks += 1 if not last_data_is_interesting or ( sort_key(data.buffer) < sort_key(self.last_data.buffer) and data.interesting_origin == self.last_data.interesting_origin ): self.last_data = data if self.shrinks >= self.settings.max_shrinks: self.exit_with(ExitReason.max_shrinks) elif ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.last_data = data if ( self.settings.timeout > 0 and time.time() >= self.start_time + self.settings.timeout ): self.exit_with(ExitReason.timeout) if not self.interesting_examples: if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) def save_buffer(self, buffer, key=None): if self.settings.database is not None: if key is None: key = self.database_key if key is None: return self.settings.database.save(key, hbytes(buffer)) def downgrade_buffer(self, buffer): if self.settings.database is not None: self.settings.database.move( self.database_key, self.secondary_key, buffer) @property def secondary_key(self): return b'.'.join((self.database_key, b"secondary")) @property def covering_key(self): return b'.'.join((self.database_key, b"coverage")) def note_details(self, data): if data.status == Status.INTERESTING: if ( self.last_data is None or self.last_data.status != Status.INTERESTING or self.last_data.interesting_origin == data.interesting_origin ): self.save_buffer(data.buffer) else: self.save_buffer(data.buffer, self.secondary_key) runtime = max(data.finish_time - data.start_time, 0.0) self.status_runtimes.setdefault(data.status, []).append(runtime) for event in set(map(self.event_to_string, data.events)): self.event_call_counts[event] += 1 def debug(self, message): with self.settings: debug_report(message) def debug_data(self, data): buffer_parts = [u"["] for i, (u, v) in enumerate(data.blocks): if i > 0: buffer_parts.append(u" || ") buffer_parts.append( u', '.join(int_to_text(int(i)) for i in data.buffer[u:v])) buffer_parts.append(u']') status = unicode_safe_repr(data.status) if data.status == Status.INTERESTING: status = u'%s (%s)' % ( status, unicode_safe_repr(data.interesting_origin,)) self.debug(u'%d bytes %s -> %s, %s' % ( data.index, u''.join(buffer_parts), status, data.output, )) def prescreen_buffer(self, buffer): """Attempt to rule out buffer as a possible interesting candidate. Returns False if we know for sure that running this buffer will not produce an interesting result. Returns True if it might (because it explores territory we have not previously tried). This is purely an optimisation to try to reduce the number of tests we run. "return True" would be a valid but inefficient implementation. """ node_index = 0 n = len(buffer) for k, b in enumerate(buffer): if node_index in self.dead: return False try: # The block size at that point provides a lower bound on how # many more bytes are required. If the buffer does not have # enough bytes to fulfill that block size then we can rule out # this buffer. if k + self.block_sizes[node_index] > n: return False except KeyError: pass try: b = self.forced[node_index] except KeyError: pass try: b = min(b, self.capped[node_index]) except KeyError: pass try: node_index = self.tree[node_index][b] except KeyError: return True else: return False def incorporate_new_buffer(self, buffer): assert self.last_data.status == Status.INTERESTING start = self.last_data.interesting_origin buffer = hbytes(buffer[:self.last_data.index]) assert sort_key(buffer) < sort_key(self.last_data.buffer) if not self.prescreen_buffer(buffer): return False assert sort_key(buffer) <= sort_key(self.last_data.buffer) data = ConjectureData.for_buffer(buffer) self.test_function(data) assert self.last_data.interesting_origin == start return data is self.last_data def run(self): with self.settings: try: self._run() except RunIsComplete: pass if self.interesting_examples: self.last_data = max( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)) if self.last_data is not None: self.debug_data(self.last_data) self.debug( u'Run complete after %d examples (%d valid) and %d shrinks' % ( self.call_count, self.valid_examples, self.shrinks, )) def _new_mutator(self): def draw_new(data, n): return uniform(self.random, n) def draw_existing(data, n): return self.last_data.buffer[data.index:data.index + n] def draw_smaller(data, n): existing = self.last_data.buffer[data.index:data.index + n] r = uniform(self.random, n) if r <= existing: return r return _draw_predecessor(self.random, existing) def draw_larger(data, n): existing = self.last_data.buffer[data.index:data.index + n] r = uniform(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing) def reuse_existing(data, n): choices = data.block_starts.get(n, []) or \ self.last_data.block_starts.get(n, []) if choices: i = self.random.choice(choices) return self.last_data.buffer[i:i + n] else: result = uniform(self.random, n) assert isinstance(result, hbytes) return result def flip_bit(data, n): buf = bytearray( self.last_data.buffer[data.index:data.index + n]) i = self.random.randint(0, n - 1) k = self.random.randint(0, 7) buf[i] ^= (1 << k) return hbytes(buf) def draw_zero(data, n): return hbytes(b'\0' * n) def draw_max(data, n): return hbytes([255]) * n def draw_constant(data, n): return bytes_from_list([ self.random.randint(0, 255) ] * n) def redraw_last(data, n): u = self.last_data.blocks[-1][0] if data.index + n <= u: return self.last_data.buffer[data.index:data.index + n] else: return uniform(self.random, n) options = [ draw_new, redraw_last, redraw_last, reuse_existing, reuse_existing, draw_existing, draw_smaller, draw_larger, flip_bit, draw_zero, draw_max, draw_zero, draw_max, draw_constant, ] bits = [ self.random.choice(options) for _ in hrange(3) ] def draw_mutated(data, n): if ( data.index + n > len(self.last_data.buffer) ): result = uniform(self.random, n) else: result = self.random.choice(bits)(data, n) return self.__rewrite_for_novelty( data, self.__zero_bound(data, result)) return draw_mutated def __rewrite(self, data, result): return self.__rewrite_for_novelty( data, self.__zero_bound(data, result) ) def __zero_bound(self, data, result): """This tries to get the size of the generated data under control by replacing the result with zero if we are too deep or have already generated too much data. This causes us to enter "shrinking mode" there and thus reduce the size of the generated data. """ if ( data.depth * 2 >= MAX_DEPTH or (data.index + len(result)) * 2 >= self.settings.buffer_size ): if any(result): data.hit_zero_bound = True return hbytes(len(result)) else: return result def __rewrite_for_novelty(self, data, result): """Take a block that is about to be added to data as the result of a draw_bytes call and rewrite it a small amount to ensure that the result will be novel: that is, not hit a part of the tree that we have fully explored. This is mostly useful for test functions which draw a small number of blocks. """ assert isinstance(result, hbytes) try: node_index = data.__current_node_index except AttributeError: node_index = 0 data.__current_node_index = node_index data.__hit_novelty = False data.__evaluated_to = 0 if data.__hit_novelty: return result node = self.tree[node_index] for i in hrange(data.__evaluated_to, len(data.buffer)): node = self.tree[node_index] try: node_index = node[data.buffer[i]] assert node_index not in self.dead node = self.tree[node_index] except KeyError: data.__hit_novelty = True return result for i, b in enumerate(result): assert isinstance(b, int) try: new_node_index = node[b] except KeyError: data.__hit_novelty = True return result new_node = self.tree[new_node_index] if new_node_index in self.dead: if isinstance(result, hbytes): result = bytearray(result) for c in range(256): if c not in node: assert c <= self.capped.get(node_index, c) result[i] = c data.__hit_novelty = True return hbytes(result) else: new_node_index = node[c] new_node = self.tree[new_node_index] if new_node_index not in self.dead: result[i] = c break else: # pragma: no cover assert False, ( 'Found a tree node which is live despite all its ' 'children being dead.') node_index = new_node_index node = new_node assert node_index not in self.dead data.__current_node_index = node_index data.__evaluated_to = data.index + len(result) return hbytes(result) @property def database(self): if self.database_key is None: return None return self.settings.database def has_existing_examples(self): return ( self.database is not None and Phase.reuse in self.settings.phases ) def reuse_existing_examples(self): """If appropriate (we have a database and have been told to use it), try to reload existing examples from the database. If there are a lot we don't try all of them. We always try the smallest example in the database (which is guaranteed to be the last failure) and the largest (which is usually the seed example which the last failure came from but we don't enforce that). We then take a random sampling of the remainder and try those. Any examples that are no longer interesting are cleared out. """ if self.has_existing_examples(): self.debug('Reusing examples from database') # We have to do some careful juggling here. We have two database # corpora: The primary and secondary. The primary corpus is a # small set of minimized examples each of which has at one point # demonstrated a distinct bug. We want to retry all of these. # We also have a secondary corpus of examples that have at some # point demonstrated interestingness (currently only ones that # were previously non-minimal examples of a bug, but this will # likely expand in future). These are a good source of potentially # interesting examples, but there are a lot of them, so we down # sample the secondary corpus to a more manageable size. corpus = sorted( self.settings.database.fetch(self.database_key), key=sort_key ) desired_size = max(2, ceil(0.1 * self.settings.max_examples)) for extra_key in [self.secondary_key, self.covering_key]: if len(corpus) < desired_size: extra_corpus = list( self.settings.database.fetch(extra_key), ) shortfall = desired_size - len(corpus) if len(extra_corpus) <= shortfall: extra = extra_corpus else: extra = self.random.sample(extra_corpus, shortfall) extra.sort(key=sort_key) corpus.extend(extra) for existing in corpus: self.last_data = ConjectureData.for_buffer(existing) try: self.test_function(self.last_data) finally: if self.last_data.status != Status.INTERESTING: self.settings.database.delete( self.database_key, existing) self.settings.database.delete( self.secondary_key, existing) def exit_with(self, reason): self.exit_reason = reason raise RunIsComplete() def generate_new_examples(self): if Phase.generate not in self.settings.phases: return zero_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=lambda data, n: self.__rewrite_for_novelty( data, hbytes(n))) self.test_function(zero_data) count = 0 while count < 10 and not self.interesting_examples: def draw_bytes(data, n): return self.__rewrite_for_novelty( data, self.__zero_bound(data, uniform(self.random, n)) ) targets_found = len(self.covering_examples) self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=draw_bytes ) self.test_function(self.last_data) self.last_data.freeze() if len(self.covering_examples) > targets_found: count = 0 else: count += 1 mutations = 0 mutator = self._new_mutator() zero_bound_queue = [] while not self.interesting_examples: if zero_bound_queue: # Whenever we generated an example and it hits a bound # which forces zero blocks into it, this creates a weird # distortion effect by making certain parts of the data # stream (especially ones to the right) much more likely # to be zero. We fix this by redistributing the generated # data by shuffling it randomly. This results in the # zero data being spread evenly throughout the buffer. # Hopefully the shrinking this causes will cause us to # naturally fail to hit the bound. # If it doesn't then we will queue the new version up again # (now with more zeros) and try again. overdrawn = zero_bound_queue.pop() buffer = bytearray(overdrawn.buffer) # These will have values written to them that are different # from what's in them anyway, so the value there doesn't # really "count" for distributional purposes, and if we # leave them in then they can cause the fraction of non # zero bytes to increase on redraw instead of decrease. for i in overdrawn.forced_indices: buffer[i] = 0 self.random.shuffle(buffer) buffer = hbytes(buffer) def draw_bytes(data, n): result = buffer[data.index:data.index + n] if len(result) < n: result += hbytes(n - len(result)) return self.__rewrite(data, result) data = ConjectureData( draw_bytes=draw_bytes, max_length=self.settings.buffer_size, ) self.test_function(data) data.freeze() else: target, last_data = self.target_selector.select() mutations += 1 targets_found = len(self.covering_examples) prev_data = self.last_data data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() if ( data.status > prev_data.status or len(self.covering_examples) > targets_found ): mutations = 0 elif ( data.status < prev_data.status or not self.target_selector.has_tag(target, data) or mutations >= self.settings.max_mutations ): mutations = 0 mutator = self._new_mutator() if getattr(data, 'hit_zero_bound', False): zero_bound_queue.append(data) mutations += 1 def _run(self): self.last_data = None self.start_time = time.time() self.reuse_existing_examples() self.generate_new_examples() if ( Phase.shrink not in self.settings.phases or not self.interesting_examples ): self.exit_with(ExitReason.finished) for prev_data in sorted( self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING data = ConjectureData.for_buffer(prev_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) while len(self.shrunk_examples) < len(self.interesting_examples): target, self.last_data = min([ (k, v) for k, v in self.interesting_examples.items() if k not in self.shrunk_examples], key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) self.debug('Shrinking %r' % (target,)) assert self.last_data.interesting_origin == target self.shrink() self.shrunk_examples.add(target) self.exit_with(ExitReason.finished) def try_buffer_with_rewriting_from(self, initial_attempt, v): initial_data = None node_index = 0 for c in initial_attempt: try: node_index = self.tree[node_index][c] except KeyError: break node = self.tree[node_index] if isinstance(node, ConjectureData): initial_data = node break if initial_data is None: initial_data = ConjectureData.for_buffer(initial_attempt) self.test_function(initial_data) if initial_data.status == Status.INTERESTING: return initial_data is self.last_data # If this produced something completely invalid we ditch it # here rather than trying to persevere. if initial_data.status < Status.VALID: return False if len(initial_data.buffer) < v: return False lost_data = len(self.last_data.buffer) - \ len(initial_data.buffer) # If this did not in fact cause the data size to shrink we # bail here because it's not worth trying to delete stuff from # the remainder. if lost_data <= 0: return False try_with_deleted = bytearray(initial_attempt) del try_with_deleted[v:v + lost_data] try_with_deleted.extend(hbytes(lost_data - 1)) if self.incorporate_new_buffer(try_with_deleted): return True for r, s in self.last_data.intervals: if ( r >= v and s - r <= lost_data and r < len(initial_data.buffer) ): try_with_deleted = bytearray(initial_attempt) del try_with_deleted[r:s] try_with_deleted.extend(hbytes(s - r - 1)) if self.incorporate_new_buffer(try_with_deleted): return True return False def delta_interval_deletion(self): """Attempt to delete every interval in the example.""" self.debug('delta interval deletes') # We do a delta-debugging style thing here where we initially try to # delete many intervals at once and prune it down exponentially to # eventually only trying to delete one interval at a time. # I'm a little skeptical that this is helpful in general, but we've # got at least one benchmark where it does help. k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False if not self.incorporate_new_buffer(hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v )): i += k k //= 2 def greedy_interval_deletion(self): """Attempt to delete every interval in the example.""" self.debug('greedy interval deletes') i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): i += 1 def coarse_block_replacement(self): """Attempts to zero every block. This is a very coarse pass that we only run once to attempt to remove some irrelevant detail. The main purpose of it is that if we manage to zero a lot of data then many attempted deletes become duplicates of each other, so we run fewer tests. If more blocks become possible to zero later that will be handled by minimize_individual_blocks. The point of this is simply to provide a fairly fast initial pass. """ self.debug('Zeroing blocks') i = 0 while i < len(self.last_data.blocks): buf = self.last_data.buffer u, v = self.last_data.blocks[i] assert u < v block = buf[u:v] if any(block): self.incorporate_new_buffer( buf[:u] + hbytes(v - u) + buf[v:] ) i += 1 def minimize_duplicated_blocks(self): """Find blocks that have been duplicated in multiple places and attempt to minimize all of the duplicates simultaneously.""" self.debug('Simultaneous shrinking of duplicated blocks') counts = Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks ) blocks = [ k for k, count in counts.items() if count > 1 ] thresholds = {} for u, v in self.last_data.blocks: b = self.last_data.buffer[u:v] thresholds[b] = v blocks.sort(reverse=True) blocks.sort(key=lambda b: counts[b] * len(b), reverse=True) for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes(EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts )) threshold = thresholds[block] minimize( block, lambda b: self.try_buffer_with_rewriting_from( replace(b), threshold), random=self.random, full=False ) def minimize_individual_blocks(self): self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.try_buffer_with_rewriting_from( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], v ), random=self.random, full=False, ) i += 1 def reorder_blocks(self): self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = ( buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 def shrink(self): # We assume that if an all-zero block of bytes is an interesting # example then we're not going to do better than that. # This might not technically be true: e.g. for integers() | booleans() # the simplest example is actually [1, 0]. Missing this case is fairly # harmless and this allows us to make various simplifying assumptions # about the structure of the data (principally that we're never # operating on a block of all zero bytes so can use non-zeroness as a # signpost of complexity). if ( not any(self.last_data.buffer) or self.incorporate_new_buffer(hbytes(len(self.last_data.buffer))) ): return if self.has_existing_examples(): # If we have any smaller examples in the secondary corpus, now is # a good time to try them to see if they work as shrinks. They # probably won't, but it's worth a shot and gives us a good # opportunity to clear out the database. # It's not worth trying the primary corpus because we already # tried all of those in the initial phase. corpus = sorted( self.settings.database.fetch(self.secondary_key), key=sort_key ) for c in corpus: if sort_key(c) >= sort_key(self.last_data.buffer): break elif self.incorporate_new_buffer(c): break else: self.settings.database.delete(self.secondary_key, c) # Coarse passes that are worth running once when the example is likely # to be "far from shrunk" but not worth repeating in a loop because # they are subsumed by more fine grained passes. self.delta_interval_deletion() self.coarse_block_replacement() change_counter = -1 while self.shrinks > change_counter: change_counter = self.shrinks self.minimize_duplicated_blocks() self.minimize_individual_blocks() self.reorder_blocks() self.greedy_interval_deletion() def event_to_string(self, event): if isinstance(event, str): return event try: return self.events_to_strings[event] except KeyError: pass result = str(event) self.events_to_strings[event] = result return result
class ConjectureRunner(object): def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.changed = 0 self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.seen = set() self.duplicates = 0 self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() def new_buffer(self): self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(self.random, n) ) self.test_function(self.last_data) self.last_data.freeze() def test_function(self, data): self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise e except: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) if ( data.status == Status.INTERESTING and ( self.last_data is None or data.buffer != self.last_data.buffer ) ): self.debug_data(data) if data.status >= Status.VALID: self.valid_examples += 1 def consider_new_test_data(self, data): # Transition rules: # 1. Transition cannot decrease the status # 2. Any transition which increases the status is valid # 3. If the previous status was interesting, only shrinking # transitions are allowed. key = hbytes(data.buffer) if key in self.seen: self.duplicates += 1 return False self.seen.add(key) if data.buffer == self.last_data.buffer: return False if self.last_data.status < data.status: return True if self.last_data.status > data.status: return False if data.status == Status.INVALID: return data.index >= self.last_data.index if data.status == Status.OVERRUN: return data.overdraw <= self.last_data.overdraw if data.status == Status.INTERESTING: assert len(data.buffer) <= len(self.last_data.buffer) if len(data.buffer) == len(self.last_data.buffer): assert data.buffer < self.last_data.buffer return True return True def save_buffer(self, buffer): if ( self.settings.database is not None and self.database_key is not None and Phase.reuse in self.settings.phases ): self.settings.database.save( self.database_key, hbytes(buffer) ) def note_details(self, data): if data.status == Status.INTERESTING: self.save_buffer(data.buffer) runtime = max(data.finish_time - data.start_time, 0.0) self.status_runtimes.setdefault(data.status, []).append(runtime) for event in set(map(self.event_to_string, data.events)): self.event_call_counts[event] += 1 def debug(self, message): with self.settings: debug_report(message) def debug_data(self, data): self.debug(u'%d bytes %s -> %s, %s' % ( data.index, unicode_safe_repr(list(data.buffer[:data.index])), unicode_safe_repr(data.status), data.output, )) def incorporate_new_buffer(self, buffer): if buffer in self.seen: return False assert self.last_data.status == Status.INTERESTING if ( self.settings.timeout > 0 and time.time() >= self.start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout raise RunIsComplete() buffer = buffer[:self.last_data.index] if sort_key(buffer) >= sort_key(self.last_data.buffer): return False assert sort_key(buffer) <= sort_key(self.last_data.buffer) data = ConjectureData.for_buffer(buffer) self.test_function(data) if self.consider_new_test_data(data): self.shrinks += 1 self.last_data = data if self.shrinks >= self.settings.max_shrinks: self.exit_reason = ExitReason.max_shrinks raise RunIsComplete() self.last_data = data self.changed += 1 return True return False def run(self): with self.settings: try: self._run() except RunIsComplete: pass self.debug( u'Run complete after %d examples (%d valid) and %d shrinks' % ( self.call_count, self.valid_examples, self.shrinks, )) def _new_mutator(self): def draw_new(data, n, distribution): return distribution(self.random, n) def draw_existing(data, n, distribution): return self.last_data.buffer[data.index:data.index + n] def draw_smaller(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r <= existing: return r return _draw_predecessor(self.random, existing) def draw_larger(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing) def reuse_existing(data, n, distribution): choices = data.block_starts.get(n, []) or \ self.last_data.block_starts.get(n, []) if choices: i = self.random.choice(choices) return self.last_data.buffer[i:i + n] else: return distribution(self.random, n) def flip_bit(data, n, distribution): buf = bytearray( self.last_data.buffer[data.index:data.index + n]) i = self.random.randint(0, n - 1) k = self.random.randint(0, 7) buf[i] ^= (1 << k) return hbytes(buf) def draw_zero(data, n, distribution): return b'\0' * n def draw_constant(data, n, distribution): return bytes_from_list([ self.random.randint(0, 255) ] * n) options = [ draw_new, reuse_existing, reuse_existing, draw_existing, draw_smaller, draw_larger, flip_bit, draw_zero, draw_constant, ] bits = [ self.random.choice(options) for _ in hrange(3) ] def draw_mutated(data, n, distribution): if ( data.index + n > len(self.last_data.buffer) ): return distribution(self.random, n) return self.random.choice(bits)(data, n, distribution) return draw_mutated def _run(self): self.last_data = None mutations = 0 start_time = time.time() if ( self.settings.database is not None and self.database_key is not None ): corpus = sorted( self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d) ) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data if data.status < Status.VALID: self.settings.database.delete( self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete( self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if Phase.generate in self.settings.phases: if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() while self.last_data.status != Status.INTERESTING: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return if not self.last_data.buffer: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Random interval deletes') failed_deletes = 0 while self.last_data.intervals and failed_deletes < 10: if self.random.randint(0, 1): u, v = self.random.choice(self.last_data.intervals) else: n = len(self.last_data.buffer) - 1 u, v = sorted(( self.random.choice(self.last_data.intervals) )) if ( v < len(self.last_data.buffer) ) and self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): failed_deletes = 0 else: failed_deletes += 1 self.debug('Structured interval deletes') i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize( self.last_data.buffer, self.incorporate_new_buffer, cautious=True ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted(set([bytes(n)] + [ buf[a:a + n] for a in self.last_data.block_starts[n] ])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer( buf[:u] + b + buf[v:] ): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return b''.join( bytes(b if c == block else c) for c in parts ) minimize( block, lambda b: self.incorporate_new_buffer(replace(b)), self.random ) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], ), self.random ) i += 1 self.debug('Replacing intervals with simpler intervals') interval_counter = -1 while interval_counter != self.changed: interval_counter = self.changed i = 0 alternatives = None while i < len(self.last_data.intervals): if alternatives is None: alternatives = sorted(set( self.last_data.buffer[u:v] for u, v in self.last_data.intervals), key=len) u, v = self.last_data.intervals[i] for a in alternatives: buf = self.last_data.buffer if ( len(a) < v - u or (len(a) == (v - u) and a < buf[u:v]) ): if self.incorporate_new_buffer( buf[:u] + a + buf[v:] ): alternatives = None break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % ( self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize( self.last_data.buffer[:cutoff], test_value, cautious=True ) b += 1 self.exit_reason = ExitReason.finished def event_to_string(self, event): if isinstance(event, str): return event try: return self.events_to_strings[event] except KeyError: pass result = str(event) self.events_to_strings[event] = result return result
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if ( self.settings.database is not None and self.database_key is not None ): corpus = sorted( self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d) ) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data if data.status < Status.VALID: self.settings.database.delete( self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete( self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if Phase.generate in self.settings.phases: if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() while self.last_data.status != Status.INTERESTING: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return if not self.last_data.buffer: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Random interval deletes') failed_deletes = 0 while self.last_data.intervals and failed_deletes < 10: if self.random.randint(0, 1): u, v = self.random.choice(self.last_data.intervals) else: n = len(self.last_data.buffer) - 1 u, v = sorted(( self.random.choice(self.last_data.intervals) )) if ( v < len(self.last_data.buffer) ) and self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): failed_deletes = 0 else: failed_deletes += 1 self.debug('Structured interval deletes') i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize( self.last_data.buffer, self.incorporate_new_buffer, cautious=True ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted(set([bytes(n)] + [ buf[a:a + n] for a in self.last_data.block_starts[n] ])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer( buf[:u] + b + buf[v:] ): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return b''.join( bytes(b if c == block else c) for c in parts ) minimize( block, lambda b: self.incorporate_new_buffer(replace(b)), self.random ) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], ), self.random ) i += 1 self.debug('Replacing intervals with simpler intervals') interval_counter = -1 while interval_counter != self.changed: interval_counter = self.changed i = 0 alternatives = None while i < len(self.last_data.intervals): if alternatives is None: alternatives = sorted(set( self.last_data.buffer[u:v] for u, v in self.last_data.intervals), key=len) u, v = self.last_data.intervals[i] for a in alternatives: buf = self.last_data.buffer if ( len(a) < v - u or (len(a) == (v - u) and a < buf[u:v]) ): if self.incorporate_new_buffer( buf[:u] + a + buf[v:] ): alternatives = None break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % ( self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize( self.last_data.buffer[:cutoff], test_value, cautious=True ) b += 1 self.exit_reason = ExitReason.finished
class ConjectureRunner(object): def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.changed = 0 self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() def __tree_is_exhausted(self): return 0 in self.dead def new_buffer(self): assert not self.__tree_is_exhausted() self.last_data = ConjectureData( max_length=self.settings.buffer_size, draw_bytes=lambda data, n, distribution: self. __rewrite_for_novelty(data, distribution(self.random, n))) self.test_function(self.last_data) self.last_data.freeze() def test_function(self, data): self.call_count += 1 try: self._test_function(data) data.freeze() except StopTest as e: if e.testcounter != data.testcounter: self.save_buffer(data.buffer) raise e except: self.save_buffer(data.buffer) raise finally: data.freeze() self.note_details(data) self.debug_data(data) if data.status >= Status.VALID: self.valid_examples += 1 tree_node = self.tree[0] indices = [] i = 0 for b in data.buffer: indices.append(i) try: i = tree_node[b] except KeyError: i = len(self.tree) self.tree.append({}) tree_node[b] = i tree_node = self.tree[i] if i in self.dead: break if data.status != Status.OVERRUN and i not in self.dead: self.dead.add(i) self.tree[i] = data for j in reversed(indices): if len(self.tree[j]) < 256: break if set(self.tree[j].values()).issubset(self.dead): self.dead.add(j) else: break def consider_new_test_data(self, data): # Transition rules: # 1. Transition cannot decrease the status # 2. Any transition which increases the status is valid # 3. If the previous status was interesting, only shrinking # transitions are allowed. if data.buffer == self.last_data.buffer: return False if self.last_data.status < data.status: return True if self.last_data.status > data.status: return False if data.status == Status.INVALID: return data.index >= self.last_data.index if data.status == Status.OVERRUN: return data.overdraw <= self.last_data.overdraw if data.status == Status.INTERESTING: assert len(data.buffer) <= len(self.last_data.buffer) if len(data.buffer) == len(self.last_data.buffer): assert data.buffer < self.last_data.buffer return True return True def save_buffer(self, buffer): if (self.settings.database is not None and self.database_key is not None and Phase.reuse in self.settings.phases): self.settings.database.save(self.database_key, hbytes(buffer)) def note_details(self, data): if data.status == Status.INTERESTING: self.save_buffer(data.buffer) runtime = max(data.finish_time - data.start_time, 0.0) self.status_runtimes.setdefault(data.status, []).append(runtime) for event in set(map(self.event_to_string, data.events)): self.event_call_counts[event] += 1 def debug(self, message): with self.settings: debug_report(message) def debug_data(self, data): self.debug(u'%d bytes %s -> %s, %s' % ( data.index, unicode_safe_repr(list(data.buffer[:data.index])), unicode_safe_repr(data.status), data.output, )) def incorporate_new_buffer(self, buffer): assert self.last_data.status == Status.INTERESTING if (self.settings.timeout > 0 and time.time() >= self.start_time + self.settings.timeout): self.exit_reason = ExitReason.timeout raise RunIsComplete() buffer = buffer[:self.last_data.index] if sort_key(buffer) >= sort_key(self.last_data.buffer): return False i = 0 for b in buffer: if i in self.dead: return False try: i = self.tree[i][b] except KeyError: break else: return False assert sort_key(buffer) <= sort_key(self.last_data.buffer) data = ConjectureData.for_buffer(buffer) self.test_function(data) if self.consider_new_test_data(data): self.shrinks += 1 self.last_data = data if self.shrinks >= self.settings.max_shrinks: self.exit_reason = ExitReason.max_shrinks raise RunIsComplete() self.last_data = data self.changed += 1 return True return False def run(self): with self.settings: try: self._run() except RunIsComplete: pass self.debug( u'Run complete after %d examples (%d valid) and %d shrinks' % ( self.call_count, self.valid_examples, self.shrinks, )) def _new_mutator(self): def draw_new(data, n, distribution): return distribution(self.random, n) def draw_existing(data, n, distribution): return self.last_data.buffer[data.index:data.index + n] def draw_smaller(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r <= existing: return r return _draw_predecessor(self.random, existing) def draw_larger(data, n, distribution): existing = self.last_data.buffer[data.index:data.index + n] r = distribution(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing) def reuse_existing(data, n, distribution): choices = data.block_starts.get(n, []) or \ self.last_data.block_starts.get(n, []) if choices: i = self.random.choice(choices) return self.last_data.buffer[i:i + n] else: result = distribution(self.random, n) assert isinstance(result, hbytes) return result def flip_bit(data, n, distribution): buf = bytearray(self.last_data.buffer[data.index:data.index + n]) i = self.random.randint(0, n - 1) k = self.random.randint(0, 7) buf[i] ^= (1 << k) return hbytes(buf) def draw_zero(data, n, distribution): return hbytes(b'\0' * n) def draw_constant(data, n, distribution): return bytes_from_list([self.random.randint(0, 255)] * n) options = [ draw_new, reuse_existing, reuse_existing, draw_existing, draw_smaller, draw_larger, flip_bit, draw_zero, draw_constant, ] bits = [self.random.choice(options) for _ in hrange(3)] def draw_mutated(data, n, distribution): if (data.index + n > len(self.last_data.buffer)): result = distribution(self.random, n) else: result = self.random.choice(bits)(data, n, distribution) return self.__rewrite_for_novelty(data, result) return draw_mutated def __rewrite_for_novelty(self, data, result): assert isinstance(result, hbytes) try: node_index = data.__current_node_index except AttributeError: assert len(data.buffer) == 0 node_index = 0 data.__current_node_index = node_index data.__hit_novelty = False if data.__hit_novelty: return result node = self.tree[node_index] assert node_index not in self.dead for i, b in enumerate(result): assert isinstance(b, int) try: new_node_index = node[b] except KeyError: data.__hit_novelty = True return result new_node = self.tree[new_node_index] if new_node_index in self.dead: if isinstance(result, hbytes): result = bytearray(result) for c in range(256): if c not in node: result[i] = c data.__hit_novelty = True return hbytes(result) else: new_node_index = node[c] new_node = self.tree[new_node_index] if new_node_index not in self.dead: result[i] = c break else: # pragma: no cover assert False, ( 'Found a tree node which is live despite all its ' 'children being dead.') node_index = new_node_index node = new_node assert node_index not in self.dead data.__current_node_index = node_index return hbytes(result) def _run(self): self.last_data = None mutations = 0 start_time = time.time() if (self.settings.database is not None and self.database_key is not None): corpus = sorted(self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d)) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data self.consider_new_test_data(data) if data.status < Status.VALID: self.settings.database.delete(self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete(self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if (Phase.generate in self.settings.phases and not self.__tree_is_exhausted()): if (self.last_data is None or self.last_data.status < Status.INTERESTING): self.new_buffer() mutator = self._new_mutator() while (self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted()): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return if (self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Structured interval deletes') k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v)): i += k k //= 2 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Bulk replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u buffer = bytearray() for r, s in self.last_data.blocks: if s - r == n and self.last_data.buffer[r:s] > block: buffer.extend(block) else: buffer.extend(self.last_data.buffer[r:s]) self.incorporate_new_buffer(hbytes(buffer)) i += 1 self.debug('Replacing individual blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted( set([hbytes(n)] + [buf[a:a + n] for a in self.last_data.block_starts[n]])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer(buf[:u] + b + buf[v:]): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes( EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts)) minimize(block, lambda b: self.incorporate_new_buffer(replace(b)), self.random) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize(self.last_data.buffer, self.incorporate_new_buffer, cautious=True) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[ v:], ), self.random) i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = (buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % (self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize(self.last_data.buffer[:cutoff], test_value, cautious=True) b += 1 self.exit_reason = ExitReason.finished def event_to_string(self, event): if isinstance(event, str): return event try: return self.events_to_strings[event] except KeyError: pass result = str(event) self.events_to_strings[event] = result return result
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if (self.settings.database is not None and self.database_key is not None): corpus = sorted(self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d)) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data self.consider_new_test_data(data) if data.status < Status.VALID: self.settings.database.delete(self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete(self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if (Phase.generate in self.settings.phases and not self.__tree_is_exhausted()): if (self.last_data is None or self.last_data.status < Status.INTERESTING): self.new_buffer() mutator = self._new_mutator() while (self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted()): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return if (self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Structured interval deletes') k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v)): i += k k //= 2 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Bulk replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u buffer = bytearray() for r, s in self.last_data.blocks: if s - r == n and self.last_data.buffer[r:s] > block: buffer.extend(block) else: buffer.extend(self.last_data.buffer[r:s]) self.incorporate_new_buffer(hbytes(buffer)) i += 1 self.debug('Replacing individual blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted( set([hbytes(n)] + [buf[a:a + n] for a in self.last_data.block_starts[n]])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer(buf[:u] + b + buf[v:]): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes( EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts)) minimize(block, lambda b: self.incorporate_new_buffer(replace(b)), self.random) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize(self.last_data.buffer, self.incorporate_new_buffer, cautious=True) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[ v:], ), self.random) i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = (buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % (self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize(self.last_data.buffer[:cutoff], test_value, cautious=True) b += 1 self.exit_reason = ExitReason.finished
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if ( self.settings.database is not None and self.database_key is not None ): corpus = sorted( self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d) ) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data self.consider_new_test_data(data) if data.status < Status.VALID: self.settings.database.delete( self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete( self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if ( Phase.generate in self.settings.phases and not self.__tree_is_exhausted() ): if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() while ( self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted() ): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return self.shrink()