def element_strategies(self): if self.__element_strategies is None: strategies = [] for arg in self.original_strategies: check_strategy(arg) if not arg.is_empty: strategies.extend( [s for s in arg.branches if not s.is_empty]) pruned = [] seen = set() for s in strategies: if s is self: continue if s in seen: continue seen.add(s) pruned.append(s) branch_labels = [] shift = bit_length(len(pruned)) for i, p in enumerate(pruned): branch_labels.append( (((self.label ^ p.label) << shift) + i) & LABEL_MASK) self.__element_strategies = pruned self.__branch_labels = tuple(branch_labels) return self.__element_strategies
def element_strategies(self): if self.__element_strategies is None: strategies = [] for arg in self.original_strategies: check_strategy(arg) if not arg.is_empty: strategies.extend( [s for s in arg.branches if not s.is_empty]) pruned = [] seen = set() for s in strategies: if s is self: continue if s in seen: continue seen.add(s) pruned.append(s) branch_labels = [] shift = bit_length(len(pruned)) for i, p in enumerate(pruned): branch_labels.append((((self.label ^ p.label) << shift) + i) & LABEL_MASK) self.__element_strategies = pruned self.__branch_labels = tuple(branch_labels) return self.__element_strategies
def saturate(n): bits = bit_length(n) k = 1 while k < bits: n |= (n >> k) k *= 2 return n
def integer_range(data, lower, upper, center=None, distribution=None): assert lower <= upper if lower == upper: return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if distribution is None: if lower < center < upper: def distribution(random): if random.randint(0, 1): return random.randint(center, upper) else: return random.randint(lower, center) else: def distribution(random): return random.randint(lower, upper) gap = upper - lower bits = bit_length(gap) nbytes = bits // 8 + int(bits % 8 != 0) mask = saturate(gap) def byte_distribution(random, n): assert n == nbytes v = distribution(random) if v >= center: probe = v - center else: probe = upper - v return int_to_bytes(probe, n) probe = gap + 1 while probe > gap: probe = int_from_bytes(data.draw_bytes(nbytes, byte_distribution)) & mask if center == upper: result = upper - probe elif center == lower: result = lower + probe else: if center + probe <= upper: result = center + probe else: result = upper - probe assert lower <= result <= upper return int(result)
def integer_range(data, lower, upper, center=None, distribution=None): assert lower <= upper if lower == upper: return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if distribution is None: if lower < center < upper: def distribution(random): if random.randint(0, 1): return random.randint(center, upper) else: return random.randint(lower, center) else: def distribution(random): return random.randint(lower, upper) gap = upper - lower bits = bit_length(gap) nbytes = bits // 8 + int(bits % 8 != 0) mask = saturate(gap) def byte_distribution(random, n): assert n == nbytes v = distribution(random) if v >= center: probe = v - center else: probe = upper - v return int_to_bytes(probe, n) probe = gap + 1 while probe > gap: probe = int_from_bytes( data.draw_bytes(nbytes, byte_distribution) ) & mask if center == upper: result = upper - probe elif center == lower: result = lower + probe else: if center + probe <= upper: result = center + probe else: result = upper - probe assert lower <= result <= upper return int(result)
def integer_range(data, lower, upper, center=None): assert lower <= upper if lower == upper: # Write a value even when this is trival so that when a bound depends # on other values we don't suddenly disappear when the gap shrinks to # zero - if that happens then often the data stream becomes misaligned # and we fail to shrink in cases where we really should be able to. data.draw_bits(1, forced=0) return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if center == upper: above = False elif center == lower: above = True else: above = boolean(data) if above: gap = upper - center else: gap = center - lower assert gap > 0 bits = bit_length(gap) probe = gap + 1 if bits > 24 and data.draw_bits(3): # For large ranges, we combine the uniform random distribution from draw_bits # with the weighting scheme used by WideRangeIntStrategy with moderate chance. # Cutoff at 2 ** 24 so unicode choice is uniform but 32bit distribution is not. idx = Sampler([4.0, 8.0, 1.0, 1.0, 0.5]).sample(data) sizes = [8, 16, 32, 64, 128] bits = min(bits, sizes[idx]) while probe > gap: data.start_example(INTEGER_RANGE_DRAW_LABEL) probe = data.draw_bits(bits) data.stop_example(discard=probe > gap) if above: result = center + probe else: result = center - probe assert lower <= result <= upper return int(result)
def draw_bits(self, n, *, forced=None): """Return an ``n``-bit integer from the underlying source of bytes. If ``forced`` is set to an integer will instead ignore the underlying source and simulate a draw as if it had returned that integer.""" self.__assert_not_frozen("draw_bits") if n == 0: return 0 assert n > 0 n_bytes = bits_to_bytes(n) self.__check_capacity(n_bytes) if forced is not None: buf = int_to_bytes(forced, n_bytes) elif self.__bytes_drawn < len(self.__prefix): index = self.__bytes_drawn buf = self.__prefix[index:index + n_bytes] if len(buf) < n_bytes: buf += uniform(self.__random, n_bytes - len(buf)) else: buf = uniform(self.__random, n_bytes) buf = bytearray(buf) self.__bytes_drawn += n_bytes assert len(buf) == n_bytes # If we have a number of bits that is not a multiple of 8 # we have to mask off the high bits. buf[0] &= BYTE_MASKS[n % 8] buf = bytes(buf) result = int_from_bytes(buf) self.observer.draw_bits(n, forced is not None, result) self.__example_record.draw_bits(n, forced) initial = self.index self.buffer.extend(buf) self.index = len(self.buffer) if forced is not None: self.forced_indices.update(range(initial, self.index)) self.blocks.add_endpoint(self.index) assert bit_length(result) <= n return result
def integer_range(data, lower, upper, center=None): assert lower <= upper if lower == upper: # Write a value even when this is trival so that when a bound depends # on other values we don't suddenly disappear when the gap shrinks to # zero - if that happens then often the data stream becomes misaligned # and we fail to shrink in cases where we really should be able to. data.write(hbytes([0])) return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if center == upper: above = False elif center == lower: above = True else: above = boolean(data) if above: gap = upper - center else: gap = center - lower assert gap > 0 bits = bit_length(gap) probe = gap + 1 while probe > gap: data.start_example(INTEGER_RANGE_DRAW_LABEL) probe = data.draw_bits(bits) data.stop_example(discard=probe > gap) if above: result = center + probe else: result = center - probe assert lower <= result <= upper return int(result)
def draw_bits(self, n): self.__assert_not_frozen('draw_bits') if n == 0: result = 0 elif n % 8 == 0: return int_from_bytes(self.draw_bytes(n // 8)) else: n_bytes = (n // 8) + 1 self.__check_capacity(n_bytes) buf = bytearray(self._draw_bytes(self, n_bytes)) assert len(buf) == n_bytes mask = (1 << (n % 8)) - 1 buf[0] &= mask self.capped_indices[self.index] = mask buf = hbytes(buf) self.__write(buf) result = int_from_bytes(buf) assert bit_length(result) <= n return result
def draw_bits(self, n, forced=None): """Return an ``n``-bit integer from the underlying source of bytes. If ``forced`` is set to an integer will instead ignore the underlying source and simulate a draw as if it had returned that integer.""" self.__assert_not_frozen("draw_bits") if n == 0: return 0 assert n > 0 n_bytes = bits_to_bytes(n) self.__check_capacity(n_bytes) if forced is not None: buf = bytearray(int_to_bytes(forced, n_bytes)) else: buf = bytearray(self._draw_bytes(self, n_bytes)) assert len(buf) == n_bytes # If we have a number of bits that is not a multiple of 8 # we have to mask off the high bits. buf[0] &= BYTE_MASKS[n % 8] buf = hbytes(buf) result = int_from_bytes(buf) self.observer.draw_bits(n, forced is not None, result) self.start_example(DRAW_BYTES_LABEL) initial = self.index self.buffer.extend(buf) self.index = len(self.buffer) if forced is not None: self.forced_indices.update(hrange(initial, self.index)) self.blocks.add_endpoint(self.index) self.stop_example() assert bit_length(result) <= n return result
def integer_range(data, lower, upper, center=None): assert lower <= upper if lower == upper: return int(lower) if center is None: center = lower center = min(max(center, lower), upper) bits = bit_length(max(upper - center, center - lower)) nbytes = bits // 8 + int(bits % 8 != 0) if center == upper: above = False elif center == lower: above = True else: above = boolean(data) if above: gap = upper - center else: gap = center - lower assert gap > 0 mask = saturate(gap) probe = gap + 1 while probe > gap: probe = int_from_bytes(data.draw_bytes(nbytes)) & mask if above: result = center + probe else: result = center - probe assert lower <= result <= upper return int(result)
def draw_bits(self, n, forced=None): """Return an ``n``-bit integer from the underlying source of bytes. If ``forced`` is set to an integer will instead ignore the underlying source and simulate a draw as if it had returned that integer.""" self.__assert_not_frozen("draw_bits") if n == 0: return 0 assert n > 0 n_bytes = bits_to_bytes(n) self.__check_capacity(n_bytes) if forced is not None: buf = bytearray(int_to_bytes(forced, n_bytes)) else: buf = bytearray(self._draw_bytes(self, n_bytes)) assert len(buf) == n_bytes # If we have a number of bits that is not a multiple of 8 # we have to mask off the high bits. buf[0] &= BYTE_MASKS[n % 8] buf = hbytes(buf) result = int_from_bytes(buf) self.observer.draw_bits(n, forced is not None, result) self.__example_record.draw_bits(n, forced) initial = self.index self.buffer.extend(buf) self.index = len(self.buffer) if forced is not None: self.forced_indices.update(hrange(initial, self.index)) self.blocks.add_endpoint(self.index) assert bit_length(result) <= n return result
def integer_range(data, lower, upper, center=None): assert lower <= upper if lower == upper: return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if center == upper: above = False elif center == lower: above = True else: above = boolean(data) if above: gap = upper - center else: gap = center - lower assert gap > 0 bits = bit_length(gap) probe = gap + 1 while probe > gap: data.start_example() probe = data.draw_bits(bits) data.stop_example(discard=probe > gap) if above: result = center + probe else: result = center - probe assert lower <= result <= upper return int(result)
def integer_range(data, lower, upper, center=None): assert lower <= upper if lower == upper: return int(lower) if center is None: center = lower center = min(max(center, lower), upper) if center == upper: above = False elif center == lower: above = True else: above = boolean(data) if above: gap = upper - center else: gap = center - lower assert gap > 0 bits = bit_length(gap) probe = gap + 1 while probe > gap: probe = data.draw_bits(bits) if above: result = center + probe else: result = center - probe assert lower <= result <= upper return int(result)
def biased_coin(data, p): """Return False with probability p (assuming a uniform generator), shrinking towards False.""" data.start_example(BIASED_COIN_LABEL) while True: # The logic here is a bit complicated and special cased to make it # play better with the shrinker. # We imagine partitioning the real interval [0, 1] into 256 equal parts # and looking at each part and whether its interior is wholly <= p # or wholly >= p. At most one part can be neither. # We then pick a random part. If it's wholly on one side or the other # of p then we use that as the answer. If p is contained in the # interval then we start again with a new probability that is given # by the fraction of that interval that was <= our previous p. # We then take advantage of the fact that we have control of the # labelling to make this shrink better, using the following tricks: # If p is <= 0 or >= 1 the result of this coin is certain. We make sure # to write a byte to the data stream anyway so that these don't cause # difficulties when shrinking. if p <= 0: data.draw_bits(1, forced=0) result = False elif p >= 1: data.draw_bits(1, forced=1) result = True else: falsey = floor(256 * (1 - p)) truthy = floor(256 * p) remainder = 256 * p - truthy if falsey + truthy == 256: if isinstance(p, Fraction): m = p.numerator n = p.denominator else: m, n = p.as_integer_ratio() assert n & (n - 1) == 0, n # n is a power of 2 assert n > m > 0 truthy = m falsey = n - m bits = bit_length(n) - 1 partial = False else: bits = 8 partial = True i = data.draw_bits(bits) # We always label the region that causes us to repeat the loop as # 255 so that shrinking this byte never causes us to need to draw # more data. if partial and i == 255: p = remainder continue if falsey == 0: # Every other partition is truthy, so the result is true result = True elif truthy == 0: # Every other partition is falsey, so the result is false result = False elif i <= 1: # We special case so that zero is always false and 1 is always # true which makes shrinking easier because we can always # replace a truthy block with 1. This has the slightly weird # property that shrinking from 2 to 1 can cause the result to # grow, but the shrinker always tries 0 and 1 first anyway, so # this will usually be fine. result = bool(i) else: # Originally everything in the region 0 <= i < falsey was false # and everything above was true. We swapped one truthy element # into this region, so the region becomes 0 <= i <= falsey # except for i = 1. We know i > 1 here, so the test for truth # becomes i > falsey. result = i > falsey break data.stop_example() return result
def biased_coin(data, p): """Return False with probability p (assuming a uniform generator), shrinking towards False.""" data.start_example(BIASED_COIN_LABEL) while True: # The logic here is a bit complicated and special cased to make it # play better with the shrinker. # We imagine partitioning the real interval [0, 1] into 256 equal parts # and looking at each part and whether its interior is wholly <= p # or wholly >= p. At most one part can be neither. # We then pick a random part. If it's wholly on one side or the other # of p then we use that as the answer. If p is contained in the # interval then we start again with a new probability that is given # by the fraction of that interval that was <= our previous p. # We then take advantage of the fact that we have control of the # labelling to make this shrink better, using the following tricks: # If p is <= 0 or >= 1 the result of this coin is certain. We make sure # to write a byte to the data stream anyway so that these don't cause # difficulties when shrinking. if p <= 0: data.write(hbytes([0])) result = False elif p >= 1: data.write(hbytes([1])) result = True else: falsey = floor(256 * (1 - p)) truthy = floor(256 * p) remainder = 256 * p - truthy if falsey + truthy == 256: if isinstance(p, Fraction): m = p.numerator n = p.denominator else: m, n = p.as_integer_ratio() assert n & (n - 1) == 0, n # n is a power of 2 assert n > m > 0 truthy = m falsey = n - m bits = bit_length(n) - 1 partial = False else: bits = 8 partial = True i = data.draw_bits(bits) # We always label the region that causes us to repeat the loop as # 255 so that shrinking this byte never causes us to need to draw # more data. if partial and i == 255: p = remainder continue if falsey == 0: # Every other partition is truthy, so the result is true result = True elif truthy == 0: # Every other partition is falsey, so the result is false result = False elif i <= 1: # We special case so that zero is always false and 1 is always # true which makes shrinking easier because we can always # replace a truthy block with 1. This has the slightly weird # property that shrinking from 2 to 1 can cause the result to # grow, but the shrinker always tries 0 and 1 first anyway, so # this will usually be fine. result = bool(i) else: # Originally everything in the region 0 <= i < falsey was false # and everything above was true. We swapped one truthy element # into this region, so the region becomes 0 <= i <= falsey # except for i = 1. We know i > 1 here, so the test for truth # becomes i > falsey. result = i > falsey break data.stop_example() return result
def do_filtered_draw(self, data, filter_strategy): # Set of indices that have been tried so far, so that we never test # the same element twice during a draw. known_bad_indices = set() def check_index(i): """Return ``True`` if the element at ``i`` satisfies the filter condition. """ if i in known_bad_indices: return False ok = filter_strategy.condition(self.elements[i]) if not ok: if not known_bad_indices: filter_strategy.note_retried(data) known_bad_indices.add(i) return ok # Start with ordinary rejection sampling. It's fast if it works, and # if it doesn't work then it was only a small amount of overhead. for _ in hrange(3): i = d.integer_range(data, 0, len(self.elements) - 1) if check_index(i): return self.elements[i] # If we've tried all the possible elements, give up now. max_good_indices = len(self.elements) - len(known_bad_indices) if not max_good_indices: return filter_not_satisfied # Figure out the bit-length of the index that we will write back after # choosing an allowed element. write_length = bit_length(len(self.elements)) # Impose an arbitrary cutoff to prevent us from wasting too much time # on very large element lists. cutoff = 10000 max_good_indices = min(max_good_indices, cutoff) # Before building the list of allowed indices, speculatively choose # one of them. We don't yet know how many allowed indices there will be, # so this choice might be out-of-bounds, but that's OK. speculative_index = d.integer_range(data, 0, max_good_indices - 1) # Calculate the indices of allowed values, so that we can choose one # of them at random. But if we encounter the speculatively-chosen one, # just use that and return immediately. allowed_indices = [] for i in hrange(min(len(self.elements), cutoff)): if check_index(i): allowed_indices.append(i) if len(allowed_indices) > speculative_index: # Early-exit case: We reached the speculative index, so # we just return the corresponding element. data.draw_bits(write_length, forced=i) return self.elements[i] # The speculative index didn't work out, but at this point we've built # the complete list of allowed indices, so we can just choose one of # them. if allowed_indices: i = d.choice(data, allowed_indices) data.draw_bits(write_length, forced=i) return self.elements[i] # If there are no allowed indices, the filter couldn't be satisfied. return filter_not_satisfied
def do_filtered_draw(self, data, filter_strategy): # Set of indices that have been tried so far, so that we never test # the same element twice during a draw. known_bad_indices = set() def check_index(i): """Return ``True`` if the element at ``i`` satisfies the filter condition. """ if i in known_bad_indices: return False ok = filter_strategy.condition(self.elements[i]) if not ok: if not known_bad_indices: filter_strategy.note_retried(data) known_bad_indices.add(i) return ok # Start with ordinary rejection sampling. It's fast if it works, and # if it doesn't work then it was only a small amount of overhead. for _ in hrange(3): i = cu.integer_range(data, 0, len(self.elements) - 1) if check_index(i): return self.elements[i] # If we've tried all the possible elements, give up now. max_good_indices = len(self.elements) - len(known_bad_indices) if not max_good_indices: return filter_not_satisfied # Figure out the bit-length of the index that we will write back after # choosing an allowed element. write_length = bit_length(len(self.elements)) # Impose an arbitrary cutoff to prevent us from wasting too much time # on very large element lists. cutoff = 10000 max_good_indices = min(max_good_indices, cutoff) # Before building the list of allowed indices, speculatively choose # one of them. We don't yet know how many allowed indices there will be, # so this choice might be out-of-bounds, but that's OK. speculative_index = cu.integer_range(data, 0, max_good_indices - 1) # Calculate the indices of allowed values, so that we can choose one # of them at random. But if we encounter the speculatively-chosen one, # just use that and return immediately. allowed_indices = [] for i in hrange(min(len(self.elements), cutoff)): if check_index(i): allowed_indices.append(i) if len(allowed_indices) > speculative_index: # Early-exit case: We reached the speculative index, so # we just return the corresponding element. data.draw_bits(write_length, forced=i) return self.elements[i] # The speculative index didn't work out, but at this point we've built # the complete list of allowed indices, so we can just choose one of # them. if allowed_indices: i = cu.choice(data, allowed_indices) data.draw_bits(write_length, forced=i) return self.elements[i] # If there are no allowed indices, the filter couldn't be satisfied. return filter_not_satisfied