def test_database_clears_secondary_key(): key = b"key" database = InMemoryExampleDatabase() def f(data): if data.draw_bits(8) == 10: data.mark_interesting() else: data.mark_invalid() runner = ConjectureRunner( f, settings=settings( max_examples=1, buffer_size=1024, database=database, suppress_health_check=HealthCheck.all(), ), database_key=key, ) for i in range(10): database.save(runner.secondary_key, hbytes([i])) runner.cached_test_function([10]) assert runner.interesting_examples assert len(set(database.fetch(key))) == 1 assert len(set(database.fetch(runner.secondary_key))) == 10 runner.clear_secondary_key() assert len(set(database.fetch(key))) == 1 assert len(set(database.fetch(runner.secondary_key))) == 0
def test_exhaustive_enumeration(prefix, bits, seed): seen = set() def f(data): if prefix: data.write(hbytes(prefix)) assert len(data.buffer) == len(prefix) k = data.draw_bits(bits) assert k not in seen seen.add(k) size = 2 ** bits seen_prefixes = set() runner = ConjectureRunner( f, settings=settings(database=None, max_examples=size), random=Random(seed), ) with pytest.raises(RunIsComplete): runner.cached_test_function(b'') for _ in hrange(size): p = runner.generate_novel_prefix() assert p not in seen_prefixes seen_prefixes.add(p) data = ConjectureData.for_buffer( hbytes(p + hbytes(2 + len(prefix)))) runner.test_function(data) assert data.status == Status.VALID node = 0 for b in data.buffer: node = runner.tree[node][b] assert node in runner.dead assert len(seen) == size
def test_debug_data(capsys): buf = [0, 1, 2] def f(data): for x in hbytes(buf): if data.draw_bits(8) != x: data.mark_invalid() data.start_example(1) data.stop_example() data.mark_interesting() runner = ConjectureRunner( f, settings=settings( max_examples=5000, buffer_size=1024, database=None, suppress_health_check=HealthCheck.all(), verbosity=Verbosity.debug, ), ) runner.cached_test_function(buf) runner.run() out, _ = capsys.readouterr() assert re.match(u"\\d+ bytes \\[.*\\] -> ", out) assert "INTERESTING" in out
def test_exhaustive_enumeration_of_partial_buffer(): seen = set() def f(data): k = data.draw_bytes(2) assert k[1] == 0 assert k not in seen seen.add(k) seen_prefixes = set() runner = ConjectureRunner( f, settings=settings(database=None, max_examples=256, buffer_size=2), random=Random(0), ) with pytest.raises(RunIsComplete): runner.cached_test_function(b'') for _ in hrange(256): p = runner.generate_novel_prefix() assert p not in seen_prefixes seen_prefixes.add(p) data = ConjectureData.for_buffer(hbytes(p + hbytes(2))) runner.test_function(data) assert data.status == Status.VALID node = 0 for b in data.buffer: node = runner.tree[node][b] assert node in runner.dead assert len(seen) == 256
def test_overruns_if_prefix(): runner = ConjectureRunner( lambda data: [data.draw_bits(1) for _ in range(2)], settings=TEST_SETTINGS, random=Random(0), ) runner.cached_test_function(b"\0\0") assert runner.tree.rewrite(b"\0")[1] == Status.OVERRUN
def test_detects_too_small_block_starts(): call_count = [0] def f(data): assert call_count[0] == 0 call_count[0] += 1 data.draw_bytes(8) data.mark_interesting() runner = ConjectureRunner(f, settings=settings(database=None)) r = runner.cached_test_function(hbytes(8)) assert r.status == Status.INTERESTING assert call_count[0] == 1 r2 = runner.cached_test_function(hbytes([255] * 7)) assert r2.status == Status.OVERRUN assert call_count[0] == 1
def test_avoids_zig_zag_trap(p): b, marker, lower_bound = p random.seed(0) b = hbytes(b) marker = hbytes(marker) n_bits = 8 * (len(b) + 1) def test_function(data): m = data.draw_bits(n_bits) if m < lower_bound: data.mark_invalid() n = data.draw_bits(n_bits) if data.draw_bytes(len(marker)) != marker: data.mark_invalid() if abs(m - n) == 1: data.mark_interesting() runner = ConjectureRunner( test_function, database_key=None, settings=settings(base_settings, phases=(Phase.generate, Phase.shrink)), ) runner.cached_test_function(b + hbytes([0]) + b + hbytes([1]) + marker) assert runner.interesting_examples runner.run() v, = runner.interesting_examples.values() data = ConjectureData.for_buffer(v.buffer) m = data.draw_bits(n_bits) n = data.draw_bits(n_bits) assert m == lower_bound if m == 0: assert n == 1 else: assert n == m - 1 budget = 2 * n_bits * ceil(log(n_bits, 2)) + 2 assert runner.shrinks <= budget
def accept(f): with deterministic_PRNG(): runner = ConjectureRunner( f, settings=settings( max_examples=5000, buffer_size=1024, database=None, suppress_health_check=HealthCheck.all(), ), ) runner.cached_test_function(start) assert runner.interesting_examples last_data, = runner.interesting_examples.values() return runner.new_shrinker( last_data, lambda d: d.status == Status.INTERESTING )
def test_will_evict_entries_from_the_cache(monkeypatch): monkeypatch.setattr(engine_module, "CACHE_SIZE", 5) count = [0] def tf(data): data.draw_bytes(1) count[0] += 1 runner = ConjectureRunner(tf, settings=TEST_SETTINGS) for _ in range(3): for n in range(10): runner.cached_test_function([n]) # Because we exceeded the cache size, our previous # calls will have been evicted, so each call to # cached_test_function will have to reexecute. assert count[0] == 30
def test_cached_test_function_does_not_reinvoke_on_prefix(): call_count = [0] def test_function(data): call_count[0] += 1 data.draw_bits(8) data.write(hbytes([7])) data.draw_bits(8) with deterministic_PRNG(): runner = ConjectureRunner(test_function, settings=TEST_SETTINGS) data = runner.cached_test_function(hbytes(3)) assert data.status == Status.VALID for n in [2, 1, 0]: prefix_data = runner.cached_test_function(hbytes(n)) assert prefix_data is Overrun assert call_count[0] == 1
def accept(tf): runner = ConjectureRunner(tf, settings=TEST_SETTINGS, random=Random(0)) runner.exit_with = lambda reason: None ran_examples = [] for e in examples: e = hbytes(e) data = runner.cached_test_function(e) ran_examples.append((e, data)) for e, d in ran_examples: rewritten, status = runner.tree.rewrite(e) assert status == d.status assert rewritten == d.buffer return runner
def test_fully_exhaust_base(monkeypatch): """In this test we generate all possible values for the first byte but never get to the point where we exhaust the root of the tree.""" seed_random(0) seen = set() def f(data): key = (data.draw_bits(2), data.draw_bits(2)) assert key not in seen seen.add(key) runner = ConjectureRunner(f, settings=settings( max_examples=10000, phases=no_shrink, buffer_size=1024, database=None, )) for c in hrange(4): runner.cached_test_function([0, c]) assert 1 in runner.dead runner.run()
def test_novel_prefixes_are_novel(): def tf(data): for _ in range(4): data.write(b"\0") data.draw_bits(2) runner = ConjectureRunner(tf, settings=TEST_SETTINGS, random=Random(0)) for _ in range(100): prefix = runner.tree.generate_novel_prefix(runner.random) example = prefix + hbytes(8 - len(prefix)) assert runner.tree.rewrite(example)[1] is None result = runner.cached_test_function(example) assert runner.tree.rewrite(example)[0] == result.buffer
def test_cached_with_masked_byte_agrees_with_results(byte_a, byte_b): def f(data): data.draw_bits(2) runner = ConjectureRunner(f) cached_a = runner.cached_test_function(hbytes([byte_a])) cached_b = runner.cached_test_function(hbytes([byte_b])) data_b = ConjectureData.for_buffer(hbytes([byte_b])) runner.test_function(data_b) # If the cache found an old result, then it should match the real result. # If it did not, then it must be because A and B were different. assert (cached_a is cached_b) == (cached_a.buffer == data_b.buffer)
def test_cached_test_function_returns_right_value(): count = [0] def tf(data): count[0] += 1 data.draw_bits(2) data.mark_interesting() with deterministic_PRNG(): runner = ConjectureRunner(tf, settings=TEST_SETTINGS) for _ in range(2): for b in (b"\0", b"\1"): d = runner.cached_test_function(b) assert d.status == Status.INTERESTING assert d.buffer == b assert count[0] == 2
def test_cached_test_function_returns_right_value(): count = [0] def tf(data): count[0] += 1 data.draw_bits(2) data.mark_interesting() with deterministic_PRNG(): runner = ConjectureRunner(tf, settings=TEST_SETTINGS) for _ in hrange(2): for b in (b"\0", b"\1"): d = runner.cached_test_function(b) assert d.status == Status.INTERESTING assert d.buffer == b assert count[0] == 2
def test_detects_too_small_block_starts(): call_count = [0] def f(data): assert call_count[0] == 0 call_count[0] += 1 data.draw_bytes(8) data.mark_interesting() runner = ConjectureRunner(f, settings=settings(database=None)) r = ConjectureData.for_buffer(hbytes(8)) runner.test_function(r) assert r.status == Status.INTERESTING assert call_count[0] == 1 r2 = runner.cached_test_function(hbytes([255] * 7)) assert r2.status == Status.OVERRUN assert call_count[0] == 1
def test_cached_with_masked_byte_agrees_with_results(byte_a, byte_b): def f(data): data.draw_bits(2) runner = ConjectureRunner(f) cached_a = runner.cached_test_function(bytes([byte_a])) cached_b = runner.cached_test_function(bytes([byte_b])) data_b = ConjectureData.for_buffer( bytes([byte_b]), observer=runner.tree.new_observer() ) runner.test_function(data_b) # If the cache found an old result, then it should match the real result. # If it did not, then it must be because A and B were different. assert (cached_a is cached_b) == (cached_a.buffer == data_b.buffer)
def test_stops_optimising_once_interesting(): hi = 2**16 - 1 def test(data): n = data.draw_bits(16) data.target_observations[""] = n if n < hi: data.mark_interesting() runner = ConjectureRunner( test, settings=settings(max_examples=10000, database=InMemoryExampleDatabase()), database_key=b"stuff", ) data = runner.cached_test_function([255] * 2) assert data.status == Status.VALID runner.pareto_optimise() assert runner.call_count <= 20 assert runner.interesting_examples
def test_database_contains_only_pareto_front(): with deterministic_PRNG(): def test(data): data.target_observations["1"] = data.draw_bits(4) data.draw_bits(64) data.target_observations["2"] = data.draw_bits(8) db = InMemoryExampleDatabase() runner = ConjectureRunner( test, settings=settings( max_examples=500, database=db, suppress_health_check=HealthCheck.all(), ), database_key=b"stuff", ) runner.run() assert len(runner.pareto_front) <= 500 for v in runner.pareto_front: assert v.status >= Status.VALID assert len(db.data) == 1 (values, ) = db.data.values() values = set(values) assert len(values) == len(runner.pareto_front) for data in runner.pareto_front: assert data.buffer in values assert data in runner.pareto_front for k in values: assert runner.cached_test_function(k) in runner.pareto_front
def test_does_not_duplicate_elements(): def test(data): data.target_observations["m"] = data.draw_bits(8) runner = ConjectureRunner( test, settings=settings(TEST_SETTINGS, database=InMemoryExampleDatabase()), database_key=b"stuff", ) d1 = runner.cached_test_function([1]).as_result() assert len(runner.pareto_front) == 1 # This can happen in practice if we e.g. reexecute a test because it has # expired from the cache. It's easier just to test it directly though # rather than simulate the failure mode. is_pareto = runner.pareto_front.add(d1) assert is_pareto assert len(runner.pareto_front) == 1
def test_branch_ending_in_write(): seen = set() def tf(data): count = 0 while data.draw_bits(1): count += 1 if count > 1: data.draw_bits(1, forced=0) b = hbytes(data.buffer) assert b not in seen seen.add(b) with deterministic_PRNG(): runner = ConjectureRunner(tf, settings=TEST_SETTINGS) for _ in hrange(100): prefix = runner.generate_novel_prefix() attempt = prefix + hbytes(2) data = runner.cached_test_function(attempt) assert data.status == Status.VALID assert attempt.startswith(data.buffer)
def test_can_patch_up_examples(): with deterministic_PRNG(): def test(data): data.start_example(42) m = data.draw_bits(6) data.target_observations["m"] = m for _ in range(m): data.draw_bits(1) data.stop_example() for i in range(4): if i != data.draw_bits(8): data.mark_invalid() runner = ConjectureRunner(test, settings=TEST_SETTINGS) d = runner.cached_test_function([0, 0, 1, 2, 3, 4]) assert d.status == Status.VALID try: runner.optimise_targets() except RunIsComplete: pass assert runner.best_observed_targets["m"] == 63
def test_branch_ending_in_write(): seen = set() def tf(data): count = 0 while data.draw_bits(1): count += 1 if count > 1: data.draw_bits(1, forced=0) b = hbytes(data.buffer) assert b not in seen seen.add(b) with deterministic_PRNG(): runner = ConjectureRunner(tf, settings=TEST_SETTINGS) for _ in hrange(100): prefix = runner.generate_novel_prefix() attempt = prefix + hbytes(2) data = runner.cached_test_function(attempt) assert data.status == Status.VALID assert attempt.startswith(data.buffer)
def test_can_reduce_poison_from_any_subtree(size, seed): """This test validates that we can minimize to any leaf node of a binary tree, regardless of where in the tree the leaf is.""" random = Random(seed) # Initially we create the minimal tree of size n, regardless of whether it # is poisoned (which it won't be - the poison event essentially never # happens when drawing uniformly at random). # Choose p so that the expected size of the tree is equal to the desired # size. p = 1.0 / (2.0 - 1.0 / size) strat = PoisonedTree(p) def test_function(data): v = data.draw(strat) if len(v) >= size: data.mark_interesting() runner = ConjectureRunner(test_function, random=random, settings=settings(TEST_SETTINGS, buffer_size=LOTS)) while not runner.interesting_examples: runner.test_function( runner.new_conjecture_data(lambda data, n: uniform(random, n))) runner.shrink_interesting_examples() data, = runner.interesting_examples.values() assert len(ConjectureData.for_buffer(data.buffer).draw(strat)) == size starts = [b.start for b in data.blocks if b.length == 2] assert len(starts) % 2 == 0 for i in hrange(0, len(starts), 2): # Now for each leaf position in the tree we try inserting a poison # value artificially. Additionally, we add a marker to the end that # must be preserved. The marker means that we are not allow to rely on # discarding the end of the buffer to get the desired shrink. u = starts[i] marker = hbytes([1, 2, 3, 4]) def test_function_with_poison(data): v = data.draw(strat) m = data.draw_bytes(len(marker)) if POISON in v and m == marker: data.mark_interesting() runner = ConjectureRunner(test_function_with_poison, random=random, settings=TEST_SETTINGS) runner.cached_test_function(data.buffer[:u] + hbytes([255]) * 4 + data.buffer[u + 4:] + marker) assert runner.interesting_examples runner.shrink_interesting_examples() shrunk, = runner.interesting_examples.values() assert ConjectureData.for_buffer( shrunk.buffer).draw(strat) == (POISON, )
def normalize( base_name, test_function, *, required_successes=100, allowed_to_update=False, max_dfas=10, ): """Attempt to ensure that this test function successfully normalizes - i.e. whenever it declares a test case to be interesting, we are able to shrink that to the same interesting test case (which logically should be the shortlex minimal interesting test case, though we may not be able to detect if it is). Will run until we have seen ``required_successes`` many interesting test cases in a row normalize to the same value. If ``allowed_to_update`` is True, whenever we fail to normalize we will learn a new DFA-based shrink pass that allows us to make progress. Any learned DFAs will be written back into the learned DFA file at the end of this function. If ``allowed_to_update`` is False, this will raise an error as soon as it encounters a failure to normalize. Additionally, if more than ``max_dfas` DFAs are required to normalize this test function, this function will raise an error - it's essentially designed for small patches that other shrink passes don't cover, and if it's learning too many patches then you need a better shrink pass than this can provide. """ # Need import inside the function to avoid circular imports from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner runner = ConjectureRunner( test_function, settings=settings(database=None, suppress_health_check=HealthCheck.all()), ignore_limits=True, ) seen = set() dfas_added = 0 found_interesting = False consecutive_successes = 0 failures_to_find_interesting = 0 while consecutive_successes < required_successes: attempt = runner.cached_test_function(b"", extend=BUFFER_SIZE) if attempt.status < Status.INTERESTING: failures_to_find_interesting += 1 assert (found_interesting or failures_to_find_interesting <= 1000 ), "Test function seems to have no interesting test cases" continue found_interesting = True target = attempt.interesting_origin def shrinking_predicate(d): return d.status == Status.INTERESTING and d.interesting_origin == target if target not in seen: seen.add(target) runner.shrink(attempt, shrinking_predicate) continue previous = fully_shrink(runner, runner.interesting_examples[target], shrinking_predicate) current = fully_shrink(runner, attempt, shrinking_predicate) if current.buffer == previous.buffer: consecutive_successes += 1 continue consecutive_successes = 0 if not allowed_to_update: raise FailedToNormalise( "Shrinker failed to normalize %r to %r and we are not allowed to learn new DFAs." % (previous.buffer, current.buffer)) if dfas_added >= max_dfas: raise FailedToNormalise( "Test function is too hard to learn: Added %d DFAs and still not done." % (dfas_added, )) dfas_added += 1 new_dfa = learn_a_new_dfa(runner, previous.buffer, current.buffer, shrinking_predicate) name = (base_name + "-" + hashlib.sha256(repr(new_dfa).encode("utf-8")).hexdigest()[:10]) # If there is a name collision this DFA should already be being # used for shrinking, so we should have already been able to shrink # v further. assert name not in SHRINKING_DFAS SHRINKING_DFAS[name] = new_dfa if dfas_added > 0: # We've learned one or more DFAs in the course of normalising, so now # we update the file to record those for posterity. update_learned_dfas()
def test_overruns_if_not_enough_bytes_for_block(): runner = ConjectureRunner(lambda data: data.draw_bytes(2), settings=TEST_SETTINGS, random=Random(0)) runner.cached_test_function(b"\0\0") assert runner.tree.rewrite(b"\0")[1] == Status.OVERRUN
def test_always_reduces_integers_to_smallest_suitable_sizes(problem): n, blob = problem blob = hbytes(blob) try: d = ConjectureData.for_buffer(blob) k = d.draw(st.integers()) stop = blob[len(d.buffer)] except (StopTest, IndexError): reject() assume(k > n) assume(stop > 0) def f(data): k = data.draw(st.integers()) data.output = repr(k) if data.draw_bits(8) == stop and k >= n: data.mark_interesting() runner = ConjectureRunner( f, random=Random(0), settings=settings( suppress_health_check=HealthCheck.all(), phases=(Phase.shrink,), database=None, verbosity=Verbosity.debug, ), database_key=None, ) runner.cached_test_function(blob) assert runner.interesting_examples v, = runner.interesting_examples.values() shrinker = runner.new_shrinker(v, lambda x: x.status == Status.INTERESTING) shrinker.fixate_shrink_passes(["minimize_individual_blocks"]) v = shrinker.shrink_target m = ConjectureData.for_buffer(v.buffer).draw(st.integers()) assert m == n # Upper bound on the length needed is calculated as follows: # * We have an initial byte at the beginning to decide the length of the # integer. # * We have a terminal byte as the stop value. # * The rest is the integer payload. This should be n. Including the sign # bit, n needs (1 + n.bit_length()) / 8 bytes (rounded up). But we only # have power of two sizes, so it may be up to a factor of two more than # that. bits_needed = 1 + n.bit_length() actual_bits_needed = min( [s for s in WideRangeIntStrategy.sizes if s >= bits_needed] ) bytes_needed = actual_bits_needed // 8 # 3 extra bytes: two for the sampler, one for the capping value. assert len(v.buffer) == 3 + bytes_needed
def test_can_reduce_poison_from_any_subtree(size, seed): """This test validates that we can minimize to any leaf node of a binary tree, regardless of where in the tree the leaf is.""" random = Random(seed) # Initially we create the minimal tree of size n, regardless of whether it # is poisoned (which it won't be - the poison event essentially never # happens when drawing uniformly at random). # Choose p so that the expected size of the tree is equal to the desired # size. p = 1.0 / (2.0 - 1.0 / size) strat = PoisonedTree(p) def test_function(data): v = data.draw(strat) if len(v) >= size: data.mark_interesting() runner = ConjectureRunner( test_function, random=random, settings=settings(TEST_SETTINGS, buffer_size=LOTS) ) while not runner.interesting_examples: runner.test_function( runner.new_conjecture_data(lambda data, n: uniform(random, n)) ) runner.shrink_interesting_examples() data, = runner.interesting_examples.values() assert len(ConjectureData.for_buffer(data.buffer).draw(strat)) == size starts = [b.start for b in data.blocks if b.length == 2] assert len(starts) % 2 == 0 for i in hrange(0, len(starts), 2): # Now for each leaf position in the tree we try inserting a poison # value artificially. Additionally, we add a marker to the end that # must be preserved. The marker means that we are not allow to rely on # discarding the end of the buffer to get the desired shrink. u = starts[i] marker = hbytes([1, 2, 3, 4]) def test_function_with_poison(data): v = data.draw(strat) m = data.draw_bytes(len(marker)) if POISON in v and m == marker: data.mark_interesting() runner = ConjectureRunner( test_function_with_poison, random=random, settings=TEST_SETTINGS ) runner.cached_test_function( data.buffer[:u] + hbytes([255]) * 4 + data.buffer[u + 4 :] + marker ) assert runner.interesting_examples runner.shrink_interesting_examples() shrunk, = runner.interesting_examples.values() assert ConjectureData.for_buffer(shrunk.buffer).draw(strat) == (POISON,)
def test_overruns_if_not_enough_bytes_for_block(): runner = ConjectureRunner( lambda data: data.draw_bytes(2), settings=TEST_SETTINGS, random=Random(0) ) runner.cached_test_function(b"\0\0") assert runner.tree.rewrite(b"\0")[1] == Status.OVERRUN
def learner_for(strategy): """Returns an LStar learner that predicts whether a buffer corresponds to a discard free choice sequence leading to a valid value for this strategy.""" try: return LEARNERS[strategy] except KeyError: pass def test_function(data): data.draw(strategy) data.mark_interesting() runner = ConjectureRunner( test_function, settings=settings( database=None, verbosity=Verbosity.quiet, suppress_health_check=HealthCheck.all(), ), random=Random(0), ignore_limits=True, ) def predicate(s): result = runner.cached_test_function(s) if result.status < Status.VALID: return False if result.has_discards: return False return result.buffer == s learner = LStar(predicate) runner.run() (v,) = runner.interesting_examples.values() # We make sure the learner has properly learned small examples. # This is all fairly ad hoc but is mostly designed to get it # to understand what the smallest example is and avoid any # loops at the beginning of the DFA that don't really exist. learner.learn(v.buffer) for n in [1, 2, 3]: for _ in range(5): learner.learn(uniform(runner.random, n) + v.buffer) prev = -1 while learner.generation != prev: prev = learner.generation for _ in range(10): s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE) learner.learn(s) data = runner.cached_test_function(s) if data.status >= Status.VALID: learner.learn(data.buffer) LEARNERS[strategy] = learner return learner
def test_always_reduces_integers_to_smallest_suitable_sizes(problem): n, blob = problem blob = hbytes(blob) try: d = ConjectureData.for_buffer(blob) k = d.draw(st.integers()) stop = blob[len(d.buffer)] except (StopTest, IndexError): reject() assume(k > n) assume(stop > 0) def f(data): k = data.draw(st.integers()) data.output = repr(k) if data.draw_bits(8) == stop and k >= n: data.mark_interesting() runner = ConjectureRunner( f, random=Random(0), settings=settings( suppress_health_check=HealthCheck.all(), phases=(Phase.shrink,), database=None, verbosity=Verbosity.debug, ), database_key=None, ) runner.cached_test_function(blob) assert runner.interesting_examples (v,) = runner.interesting_examples.values() shrinker = runner.new_shrinker(v, lambda x: x.status == Status.INTERESTING) shrinker.fixate_shrink_passes(["minimize_individual_blocks"]) v = shrinker.shrink_target m = ConjectureData.for_buffer(v.buffer).draw(st.integers()) assert m == n # Upper bound on the length needed is calculated as follows: # * We have an initial byte at the beginning to decide the length of the # integer. # * We have a terminal byte as the stop value. # * The rest is the integer payload. This should be n. Including the sign # bit, n needs (1 + n.bit_length()) / 8 bytes (rounded up). But we only # have power of two sizes, so it may be up to a factor of two more than # that. bits_needed = 1 + n.bit_length() actual_bits_needed = min( [s for s in WideRangeIntStrategy.sizes if s >= bits_needed] ) bytes_needed = actual_bits_needed // 8 # 3 extra bytes: two for the sampler, one for the capping value. assert len(v.buffer) == 3 + bytes_needed