def test_collections_counter(): class MyCounter(Counter): pass cases = [ (Counter(), 'Counter()'), (Counter(a=1), "Counter({'a': 1})"), (MyCounter(a=1), "MyCounter({'a': 1})"), ] for obj, expected in cases: assert_equal(pretty.pretty(obj), expected)
def minimize_duplicated_blocks(self): """Find blocks that have been duplicated in multiple places and attempt to minimize all of the duplicates simultaneously.""" self.debug('Simultaneous shrinking of duplicated blocks') counts = Counter(self.last_data.buffer[u:v] for u, v in self.last_data.blocks) blocks = [k for k, count in counts.items() if count > 1] thresholds = {} for u, v in self.last_data.blocks: b = self.last_data.buffer[u:v] thresholds[b] = v blocks.sort(reverse=True) blocks.sort(key=lambda b: counts[b] * len(b), reverse=True) for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes( EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts)) threshold = thresholds[block] minimize(block, lambda b: self.try_buffer_with_rewriting_from( replace(b), threshold), random=self.random, full=False)
def __init__(self, test_function, settings=None, random=None, database_key=None): self._test_function = test_function self.settings = settings or Settings() self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = benchmark_time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.all_drawtimes = [] self.all_runtimes = [] self.events_to_strings = WeakKeyDictionary() self.target_selector = TargetSelector(self.random) self.interesting_examples = {} self.covering_examples = {} self.shrunk_examples = set() self.health_check_state = None self.used_examples_from_database = False self.reset_tree_to_empty()
def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.changed = 0 self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() self.forced = {}
def __init__(self, test_function, settings=None, random=None, database_key=None): self._test_function = test_function self.settings = settings or Settings() self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.all_drawtimes = [] self.all_runtimes = [] self.events_to_strings = WeakKeyDictionary() self.target_selector = TargetSelector(self.random) self.interesting_examples = {} # We use call_count because there may be few possible valid_examples. self.first_bug_found_at = None self.last_bug_found_at = None self.shrunk_examples = set() self.health_check_state = None self.used_examples_from_database = False self.tree = DataTree() # We want to be able to get the ConjectureData object that results # from running a buffer without recalculating, especially during # shrinking where we need to know about the structure of the # executed test case. self.__data_cache = LRUReusedCache(CACHE_SIZE)
def reset(self): self.examples_by_tags = defaultdict(list) self.tag_usage_counts = Counter() self.tags_by_score = defaultdict(SampleSet) self.scores_by_tag = {} self.scores = [] self.mutation_counts = 0 self.example_counts = 0 self.non_universal_tags = set() self.universal_tags = None
def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary() # Tree nodes are stored in an array to prevent heavy nesting of data # structures. Branches are dicts mapping bytes to child nodes (which # will in general only be partially populated). Leaves are # ConjectureData objects that have been previously seen as the result # of following that path. self.tree = [{}] # A node is dead if there is nothing left to explore past that point. # Recursively, a node is dead if either it is a leaf or every byte # leads to a dead node when starting from here. self.dead = set() # We rewrite the byte stream at various points during parsing, to one # that will produce an equivalent result but is in some sense more # canonical. We keep track of these so that when walking the tree we # can identify nodes where the exact byte value doesn't matter and # treat all bytes there as equivalent. This significantly reduces the # size of the search space and removes a lot of redundant examples. # Maps tree indices where to the unique byte that is valid at that # point. Corresponds to data.write() calls. self.forced = {} # Maps tree indices to the maximum byte that is valid at that point. # Currently this is only used inside draw_bits, but it potentially # could get used elsewhere. self.capped = {} # Where a tree node consists of the beginning of a block we track the # size of said block. This allows us to tell when an example is too # short even if it goes off the unexplored region of the tree - if it # is at the beginning of a block of size 4 but only has 3 bytes left, # it's going to overrun the end of the buffer regardless of the # buffer contents. self.block_sizes = {}
def __init__( self, test_function, settings=None, random=None, database_key=None, ): self._test_function = test_function self.settings = settings or Settings() self.last_data = None self.changed = 0 self.shrinks = 0 self.call_count = 0 self.event_call_counts = Counter() self.valid_examples = 0 self.start_time = time.time() self.random = random or Random(getrandbits(128)) self.database_key = database_key self.seen = set() self.duplicates = 0 self.status_runtimes = {} self.events_to_strings = WeakKeyDictionary()
def is_good(xs): return max(Counter(xs).values()) >= 3
def criterion(ls): c = Counter(type(l) for l in ls) return len(c) >= 2 and min(c.values()) >= 3
def shrink(self): # We assume that if an all-zero block of bytes is an interesting # example then we're not going to do better than that. # This might not technically be true: e.g. for integers() | booleans() # the simplest example is actually [1, 0]. Missing this case is fairly # harmless and this allows us to make various simplifying assumptions # about the structure of the data (principally that we're never # operating on a block of all zero bytes so can use non-zeroness as a # signpost of complexity). if ( not any(self.last_data.buffer) or self.incorporate_new_buffer(hbytes(len(self.last_data.buffer))) ): self.exit_reason = ExitReason.finished return if self.has_existing_examples(): corpus = sorted( self.settings.database.fetch(self.database_key), key=sort_key ) # We always have self.last_data.buffer in the database because # we save every interesting example. This means we will always # trigger the first break and thus never exit the loop normally. for c in corpus: # pragma: no branch if sort_key(c) >= sort_key(self.last_data.buffer): break elif self.incorporate_new_buffer(c): break else: self.settings.database.delete(self.database_key, c) change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Structured interval deletes') k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer(hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v )): i += k k //= 2 self.zero_blocks() minimize( self.last_data.buffer, self.incorporate_new_buffer, cautious=True, random=self.random, ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Bulk replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u buffer = bytearray() for r, s in self.last_data.blocks: if s - r == n and self.last_data.buffer[r:s] > block: buffer.extend(block) else: buffer.extend(self.last_data.buffer[r:s]) self.incorporate_new_buffer(hbytes(buffer)) i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes(EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts )) minimize( block, lambda b: self.incorporate_new_buffer(replace(b)), random=self.random, ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], ), random=self.random, ) i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = ( buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 self.debug('Shuffling suffixes while shrinking %r' % ( self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize( self.last_data.buffer[:cutoff], test_value, cautious=True, random=self.random, ) b += 1 self.exit_reason = ExitReason.finished
def test_cyclic_counter(): c = Counter() k = HashItAnyway(c) c[k] = 1 assert pretty.pretty(c) == 'Counter({Counter(...): 1})'
def is_good(xs): xs = list(filter(None, xs)) assume(xs) return max(Counter(xs).values()) >= 3
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if (self.settings.database is not None and self.database_key is not None): corpus = sorted(self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d)) for existing in corpus: if self.valid_examples >= self.settings.max_examples: return if self.iterations >= max(self.settings.max_iterations, self.settings.max_examples): return data = TestData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data if data.status < Status.VALID: self.settings.database.delete(self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete(self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if Phase.generate in self.settings.phases: if (self.last_data is None or self.last_data.status < Status.INTERESTING): self.new_buffer() mutator = self._new_mutator() while self.last_data.status != Status.INTERESTING: if self.valid_examples >= self.settings.max_examples: return if self.iterations >= max(self.settings.max_iterations, self.settings.max_examples): return if (self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout): return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = TestData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() self.note_for_corpus(data) prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 data = self.last_data if data is None: return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: return if Phase.shrink not in self.settings.phases: return if not self.last_data.buffer: return data = TestData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: return change_counter = -1 while self.changed > change_counter: change_counter = self.changed failed_deletes = 0 while self.last_data.intervals and failed_deletes < 10: if self.random.randint(0, 1): u, v = self.random.choice(self.last_data.intervals) else: n = len(self.last_data.buffer) - 1 u, v = sorted( (self.random.choice(self.last_data.intervals))) if ( v < len(self.last_data.buffer) ) and self.incorporate_new_buffer(self.last_data.buffer[:u] + self.last_data.buffer[v:]): failed_deletes = 0 else: failed_deletes += 1 i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer(self.last_data.buffer[:u] + self.last_data.buffer[v:]): i += 1 i = 0 while i + 1 < len(self.last_data.buffer): if not self.incorporate_new_buffer(self.last_data.buffer[:i] + self.last_data.buffer[i + 1:]): i += 1 i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted( set([bytes(n)] + [buf[a:a + n] for a in self.last_data.block_starts[n]])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer(buf[:u] + b + buf[v:]): break i += 1 block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return b''.join( bytes(b if c == block else c) for c in parts) minimize(block, lambda b: self.incorporate_new_buffer(replace(b)), self.random) i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[ v:], ), self.random) i += 1 i = 0 alternatives = None while i < len(self.last_data.intervals): if alternatives is None: alternatives = sorted(set( self.last_data.buffer[u:v] for u, v in self.last_data.intervals), key=len) u, v = self.last_data.intervals[i] for a in alternatives: buf = self.last_data.buffer if (len(a) < v - u or (len(a) == (v - u) and a < buf[u:v])): if self.incorporate_new_buffer(buf[:u] + a + buf[v:]): alternatives = None break i += 1
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if ( self.settings.database is not None and self.database_key is not None ): corpus = sorted( self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d) ) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data if data.status < Status.VALID: self.settings.database.delete( self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete( self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if Phase.generate in self.settings.phases: if ( self.last_data is None or self.last_data.status < Status.INTERESTING ): self.new_buffer() mutator = self._new_mutator() while self.last_data.status != Status.INTERESTING: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max( self.settings.max_iterations, self.settings.max_examples ): self.exit_reason = ExitReason.max_iterations return if ( self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout ): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData( draw_bytes=mutator, max_length=self.settings.buffer_size ) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return if not self.last_data.buffer: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Random interval deletes') failed_deletes = 0 while self.last_data.intervals and failed_deletes < 10: if self.random.randint(0, 1): u, v = self.random.choice(self.last_data.intervals) else: n = len(self.last_data.buffer) - 1 u, v = sorted(( self.random.choice(self.last_data.intervals) )) if ( v < len(self.last_data.buffer) ) and self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): failed_deletes = 0 else: failed_deletes += 1 self.debug('Structured interval deletes') i = 0 while i < len(self.last_data.intervals): u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( self.last_data.buffer[:u] + self.last_data.buffer[v:] ): i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize( self.last_data.buffer, self.incorporate_new_buffer, cautious=True ) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted(set([bytes(n)] + [ buf[a:a + n] for a in self.last_data.block_starts[n] ])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer( buf[:u] + b + buf[v:] ): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return b''.join( bytes(b if c == block else c) for c in parts ) minimize( block, lambda b: self.incorporate_new_buffer(replace(b)), self.random ) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[v:], ), self.random ) i += 1 self.debug('Replacing intervals with simpler intervals') interval_counter = -1 while interval_counter != self.changed: interval_counter = self.changed i = 0 alternatives = None while i < len(self.last_data.intervals): if alternatives is None: alternatives = sorted(set( self.last_data.buffer[u:v] for u, v in self.last_data.intervals), key=len) u, v = self.last_data.intervals[i] for a in alternatives: buf = self.last_data.buffer if ( len(a) < v - u or (len(a) == (v - u) and a < buf[u:v]) ): if self.incorporate_new_buffer( buf[:u] + a + buf[v:] ): alternatives = None break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % ( self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize( self.last_data.buffer[:cutoff], test_value, cautious=True ) b += 1 self.exit_reason = ExitReason.finished
def _run(self): self.last_data = None mutations = 0 start_time = time.time() if (self.settings.database is not None and self.database_key is not None): corpus = sorted(self.settings.database.fetch(self.database_key), key=lambda d: (len(d), d)) for existing in corpus: if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return data = ConjectureData.for_buffer(existing) self.test_function(data) data.freeze() self.last_data = data self.consider_new_test_data(data) if data.status < Status.VALID: self.settings.database.delete(self.database_key, existing) elif data.status == Status.VALID: # Incremental garbage collection! we store a lot of # examples in the DB as we shrink: Those that stay # interesting get kept, those that become invalid get # dropped, but those that are merely valid gradually go # away over time. if self.random.randint(0, 2) == 0: self.settings.database.delete(self.database_key, existing) else: assert data.status == Status.INTERESTING self.last_data = data break if (Phase.generate in self.settings.phases and not self.__tree_is_exhausted()): if (self.last_data is None or self.last_data.status < Status.INTERESTING): self.new_buffer() mutator = self._new_mutator() while (self.last_data.status != Status.INTERESTING and not self.__tree_is_exhausted()): if self.valid_examples >= self.settings.max_examples: self.exit_reason = ExitReason.max_examples return if self.call_count >= max(self.settings.max_iterations, self.settings.max_examples): self.exit_reason = ExitReason.max_iterations return if (self.settings.timeout > 0 and time.time() >= start_time + self.settings.timeout): self.exit_reason = ExitReason.timeout return if mutations >= self.settings.max_mutations: mutations = 0 self.new_buffer() mutator = self._new_mutator() else: data = ConjectureData(draw_bytes=mutator, max_length=self.settings.buffer_size) self.test_function(data) data.freeze() prev_data = self.last_data if self.consider_new_test_data(data): self.last_data = data if data.status > prev_data.status: mutations = 0 else: mutator = self._new_mutator() mutations += 1 if self.__tree_is_exhausted(): self.exit_reason = ExitReason.finished return data = self.last_data if data is None: self.exit_reason = ExitReason.finished return assert isinstance(data.output, text_type) if self.settings.max_shrinks <= 0: self.exit_reason = ExitReason.max_shrinks return if Phase.shrink not in self.settings.phases: self.exit_reason = ExitReason.finished return data = ConjectureData.for_buffer(self.last_data.buffer) self.test_function(data) if data.status != Status.INTERESTING: self.exit_reason = ExitReason.flaky return change_counter = -1 while self.changed > change_counter: change_counter = self.changed self.debug('Structured interval deletes') k = len(self.last_data.intervals) // 2 while k > 0: i = 0 while i + k <= len(self.last_data.intervals): bitmask = [True] * len(self.last_data.buffer) for u, v in self.last_data.intervals[i:i + k]: for t in range(u, v): bitmask[t] = False u, v = self.last_data.intervals[i] if not self.incorporate_new_buffer( hbytes( b for b, v in zip(self.last_data.buffer, bitmask) if v)): i += k k //= 2 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Bulk replacing blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u buffer = bytearray() for r, s in self.last_data.blocks: if s - r == n and self.last_data.buffer[r:s] > block: buffer.extend(block) else: buffer.extend(self.last_data.buffer[r:s]) self.incorporate_new_buffer(hbytes(buffer)) i += 1 self.debug('Replacing individual blocks with simpler blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] buf = self.last_data.buffer block = buf[u:v] n = v - u all_blocks = sorted( set([hbytes(n)] + [buf[a:a + n] for a in self.last_data.block_starts[n]])) better_blocks = all_blocks[:all_blocks.index(block)] for b in better_blocks: if self.incorporate_new_buffer(buf[:u] + b + buf[v:]): break i += 1 self.debug('Simultaneous shrinking of duplicated blocks') block_counter = -1 while block_counter < self.changed: block_counter = self.changed blocks = [ k for k, count in Counter( self.last_data.buffer[u:v] for u, v in self.last_data.blocks).items() if count > 1 ] for block in blocks: parts = [ self.last_data.buffer[r:s] for r, s in self.last_data.blocks ] def replace(b): return hbytes( EMPTY_BYTES.join( hbytes(b if c == block else c) for c in parts)) minimize(block, lambda b: self.incorporate_new_buffer(replace(b)), self.random) if change_counter != self.changed: self.debug('Restarting') continue self.debug('Lexicographical minimization of whole buffer') minimize(self.last_data.buffer, self.incorporate_new_buffer, cautious=True) self.debug('Shrinking of individual blocks') i = 0 while i < len(self.last_data.blocks): u, v = self.last_data.blocks[i] minimize( self.last_data.buffer[u:v], lambda b: self.incorporate_new_buffer( self.last_data.buffer[:u] + b + self.last_data.buffer[ v:], ), self.random) i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Reordering blocks') block_lengths = sorted(self.last_data.block_starts, reverse=True) for n in block_lengths: i = 1 while i < len(self.last_data.block_starts.get(n, ())): j = i while j > 0: buf = self.last_data.buffer blocks = self.last_data.block_starts[n] a_start = blocks[j - 1] b_start = blocks[j] a = buf[a_start:a_start + n] b = buf[b_start:b_start + n] if a <= b: break swapped = (buf[:a_start] + b + buf[a_start + n:b_start] + a + buf[b_start + n:]) assert len(swapped) == len(buf) assert swapped < buf if self.incorporate_new_buffer(swapped): j -= 1 else: break i += 1 if change_counter != self.changed: self.debug('Restarting') continue self.debug('Shuffling suffixes while shrinking %r' % (self.last_data.bind_points, )) b = 0 while b < len(self.last_data.bind_points): cutoff = sorted(self.last_data.bind_points)[b] def test_value(prefix): for t in hrange(5): alphabet = {} for i, j in self.last_data.blocks[b:]: alphabet.setdefault(j - i, []).append((i, j)) if t > 0: for v in alphabet.values(): self.random.shuffle(v) buf = bytearray(prefix) for i, j in self.last_data.blocks[b:]: u, v = alphabet[j - i].pop() buf.extend(self.last_data.buffer[u:v]) if self.incorporate_new_buffer(hbytes(buf)): return True return False minimize(self.last_data.buffer[:cutoff], test_value, cautious=True) b += 1 self.exit_reason = ExitReason.finished