def sort_regions_with_gaps(self): """Guarantees that for each i we have tried to swap index i with index i + 2. This uses an adaptive algorithm that works by sorting contiguous regions centered on each element, where that element is treated as fixed and the elements around it are sorted.. """ for i in range(1, len(self.current) - 1): if self.current[i - 1] <= self.current[i] <= self.current[i + 1]: continue def can_sort(a, b): if a < 0 or b > len(self.current): return False assert a <= i < b split = i - a values = sorted(self.current[a:i] + self.current[i + 1:b]) return self.consider( list(self.current[:a]) + values[:split] + [self.current[i]] + values[split:] + list(self.current[b:])) left = i right = i + 1 right += find_integer(lambda k: can_sort(left, right + k)) find_integer(lambda k: can_sort(left - k, right))
def run_step(self): # Try to delete as many elements as possible from the sequence, in # (roughly) one pass, from right to left. # Starting from the end of the sequence, we try to delete as many # consecutive elements as possible. When we encounter an element that # can't be deleted this way, we skip over it for the rest of the pass, # and continue to its left. This lets us finish a pass in linear time, # but the drawback is that we'll miss some possible deletions of # already-skipped elements. skipped = 0 # When every element has been deleted or skipped, the pass is complete. while skipped < len(self.current): # Number of remaining elements to the left of the skipped region. # These are all candidates for attempted deletion. candidates = len(self.current) - skipped # Take a stable snapshot of the current sequence, so that deleting # elements doesn't mess with our slice indices. start = self.current # Delete as many elements as possible (k) from the candidate # region, from right to left. Always retain the skipped elements # at the end. (See diagram below.) find_integer( lambda k: k <= candidates and self.consider( start[:candidates - k] + start[candidates:] ) ) # If we stopped because of an element we couldn't delete, enlarge # the skipped region to include it, and continue. (If we stopped # because we deleted everything, the loop is about to end anyway.) skipped += 1
def run_step(self): # Try to delete as many elements as possible from the sequence, in # (roughly) one pass, from right to left. # Starting from the end of the sequence, we try to delete as many # consecutive elements as possible. When we encounter an element that # can't be deleted this way, we skip over it for the rest of the pass, # and continue to its left. This lets us finish a pass in linear time, # but the drawback is that we'll miss some possible deletions of # already-skipped elements. skipped = 0 # When every element has been deleted or skipped, the pass is complete. while skipped < len(self.current): # Number of remaining elements to the left of the skipped region. # These are all candidates for attempted deletion. candidates = len(self.current) - skipped # Take a stable snapshot of the current sequence, so that deleting # elements doesn't mess with our slice indices. start = self.current # Delete as many elements as possible (k) from the candidate # region, from right to left. Always retain the skipped elements # at the end. (See diagram below.) find_integer(lambda k: k <= candidates and self.consider( start[:candidates - k] + start[candidates:])) # If we stopped because of an element we couldn't delete, enlarge # the skipped region to include it, and continue. (If we stopped # because we deleted everything, the loop is about to end anyway.) skipped += 1
def run_step(self): j = 0 while j < len(self.current): i = len(self.current) - 1 - j start = self.current find_integer(lambda k: k <= i and self.consider(start[:i + 1 - k] + start[i + 1:])) j += 1
def run_step(self): j = 0 while j < len(self.current): i = len(self.current) - 1 - j start = self.current find_integer( lambda k: k <= i + 1 and self.consider( start[:i + 1 - k] + start[i + 1:] ) ) j += 1
def find_integer_cost(n): try: return FIND_INTEGER_COSTS[n] except KeyError: pass cost = [0] def test(i): cost[0] += 1 return i <= n find_integer(test) return FIND_INTEGER_COSTS.setdefault(n, cost[0])
def run_step(self): for i in range(len(self.current)): # This is essentially insertion sort, but unlike normal insertion # sort because of our use of find_integer we only perform # O(n(log(n))) calls. Because of the rotations we're still O(n^2) # performance in terms of number of list operations, but we don't # care about those so much. original = self.current def push_back(k): if k > i: return False j = i - k attempt = list(original) del attempt[i] attempt.insert(j, original[i]) return self.consider(attempt) find_integer(push_back)
def sort_regions(self): """Guarantees that for each i we have tried to swap index i with index i + 1. This uses an adaptive algorithm that works by sorting contiguous regions starting from each element. """ i = 0 while i + 1 < len(self.current): prefix = list(self.current[:i]) k = find_integer( lambda k: i + k <= len(self.current) and self.consider( prefix + sorted(self.current[i:i + k], key=self.key ) + list(self.current[i + k:]))) i += k
def hill_climb(self): """The main hill climbing loop where we actually do the work: Take data, and attempt to improve its score for target. select_example takes a data object and returns an index to an example where we should focus our efforts.""" blocks_examined = set() prev = None i = len(self.current_data.blocks) - 1 while i >= 0 and self.improvements <= self.max_improvements: if prev is not self.current_data: i = len(self.current_data.blocks) - 1 prev = self.current_data if i in blocks_examined: i -= 1 continue blocks_examined.add(i) data = self.current_data block = data.blocks[i] prefix = data.buffer[:block.start] existing = data.buffer[block.start:block.end] existing_as_int = int_from_bytes(existing) max_int_value = (256**len(existing)) - 1 if existing_as_int == max_int_value: continue def attempt_replace(v): """Try replacing the current block in the current best test case with an integer of value i. Note that we use the *current* best and not the one we started with. This helps ensure that if we luck into a good draw when making random choices we get to keep the good bits.""" if v < 0 or v > max_int_value: return False v_as_bytes = int_to_bytes(v, len(existing)) # We make a couple attempts at replacement. This only matters # if we end up growing the buffer - otherwise we exit the loop # early - but in the event that there *is* some randomized # component we want to give it a couple of tries to succeed. for _ in range(3): attempt = self.engine.cached_test_function( prefix + v_as_bytes + self.current_data.buffer[block.end:] + bytes(BUFFER_SIZE), ) if self.consider_new_test_data(attempt): return True if attempt.status < Status.INVALID or len( attempt.buffer) == len(self.current_data.buffer): return False for i, ex in enumerate(self.current_data.examples): if ex.start >= block.end: break if ex.end <= block.start: continue ex_attempt = attempt.examples[i] if ex.length == ex_attempt.length: continue replacement = attempt.buffer[ex_attempt. start:ex_attempt.end] if self.consider_new_test_data( self.engine.cached_test_function( prefix + replacement + self.current_data.buffer[ex.end:])): return True return False # We unconditionally scan both upwards and downwards. The reason # for this is that we allow "lateral" moves that don't increase the # score but instead leave it constant. All else being equal we'd # like to leave the test case closer to shrunk, so afterwards we # try lowering the value towards zero even if we've just raised it. if not attempt_replace(max_int_value): find_integer(lambda k: attempt_replace(k + existing_as_int)) existing = self.current_data.buffer[block.start:block.end] existing_as_int = int_from_bytes(existing) if not attempt_replace(0): find_integer(lambda k: attempt_replace(existing_as_int - k))
def shift_right(self): base = self.current find_integer(lambda k: k <= self.size and self.consider(base >> k))
def run_step(self): # Try to delete as many elements as possible from the sequence, trying # each element no more than once. # We convert the sequence to a set of indices. This allows us to more # easily do book-keeping around which elements we've tried removing. initial = self.current indices = list(hrange(len(self.current))) # The set of indices that we have not yet removed (either because # we have not yet tried to remove them or because we tried and # failed). current_subset = set(indices) # The set of indices in current_subset that we have not yet tried # to remove. candidates_for_removal = set(current_subset) def consider_set(keep): """Try replacing current_subset with current_subset & keep.""" keep = keep & current_subset to_remove = current_subset - keep # Once we've tried and failed to delete an element we never # attempt to delete it again in the current pass. This can cause # us to skip shrinks that would work, but that doesn't matter - # if this pass succeeded then it will run again at some point, # so those will be picked up later. if not to_remove.issubset(candidates_for_removal): return False if self.consider([v for i, v in enumerate(initial) if i in keep]): current_subset.intersection_update(keep) return True return False # We iterate over the indices in random order. This is because deletions # towards the end are more likely to work, while deletions from the # beginning are more likely to have higher impact. In addition there # tend to be large "dead" regions where nothing can be deleted, and # by proceeding in random order we don't have long gaps in those where # we make no progress. # # Note that this may be strictly more expensive than iterating from # left to right or right to left. The cost of find_integer, say f, is # convex. When deleting n elements starting from the left we pay f(n) # invocations, but when starting from the middle we pay 2 f(n / 2) # > f(n) invocations. In this case we are prioritising making progress # over a possibly strictly lower cost for two reasons: Firstly, when # n is small we just do linear scans anyway so this doesn't actually # matter, and secondly because successfuly deletions will tend to # speed up the test function and thus even when we make more test # function calls we may still win on time. # # It's also very frustrating watching the shrinker repeatedly fail # to delete, so there's a psychological benefit to prioritising # progress over cost. self.random.shuffle(indices) for i in indices: candidates_for_removal &= current_subset if not candidates_for_removal: break # We have already processed this index, either because it was bulk # removed or is the end point of a set that was. if i not in candidates_for_removal: continue # Note that we do not update candidates_for_removal until we've # actually tried removing them. This is because our consider_set # predicate checks whether we've previously tried deleting them, # so removing them here would result in wrong checks! # We now try to delete a region around i. We start by trying to # delete a region starting with i, i.e. [i, j) for some j > i. to_right = find_integer( lambda n: i + n <= len(initial) and consider_set(current_subset - set(hrange(i, i + n))) ) # If that succeeded we're in a deletable region. It's unlikely that # we happened to pick the starting index of that region, so we try # to extend it to the left too. if to_right > 0: to_left = find_integer( lambda n: i - n >= 0 and consider_set(current_subset - set(hrange(i - n, i))) ) # find_integer always tries at least n + 1 when it returns n. # This means that we've tried deleting i - (to_left + 1) and # failed to do so, so we can remove it from our candidates for # deletion. candidates_for_removal.discard(i - to_left - 1) # We've now tried deleting i so remove it. candidates_for_removal.discard(i) # As per comment above we've also tried deleting one past the end # of the region so we remove that from the candidate set too. candidates_for_removal.discard(i + to_right)
def shift_right(self): base = self.current find_integer(lambda k: k <= self.size and self.consider( base >> k ))