def test_squash_changes_does_not_prune_on_missing_trie_node( inserts_and_updates): inserts, updates = inserts_and_updates node_db = {} trie = HexaryTrie(node_db) with trie.squash_changes() as trie_batch: for key, value in inserts: trie_batch[key] = value missing_nodes = dict(node_db) node_db.clear() with trie.squash_changes() as trie_batch: for key, value in updates: # repeat until change is complete change_complete = False while not change_complete: # Catch any missing nodes during trie change, and fix them up. # This is equivalent to Trinity's "Beam Sync". previous_db = trie_batch.db.copy() try: if value is None: del trie_batch[key] else: trie_batch[key] = value except MissingTrieNode as exc: # When an exception is raised, we must never change the database current_db = trie_batch.db.copy() assert current_db == previous_db node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) else: change_complete = True
def test_hexary_trie_saving_final_root(name, updates, expected, deleted, final_root): db = {} trie = HexaryTrie(db=db) with trie.squash_changes() as memory_trie: for key, value in updates: if value is None: del memory_trie[key] else: memory_trie[key] = value for key in deleted: del memory_trie[key] # access all of the values in the trie, triggering reads for all the database keys # that support the final state flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=trie.root_hash) for key, val in expected.items(): assert flag_trie[key] == val # assert that no unnecessary database values were created unread = flagged_usage_db.unread_keys() straggler_data = {k: (db[k], decode_node(db[k])) for k in unread} assert len(unread) == 0, straggler_data actual_root = trie.root_hash assert actual_root == final_root
def _make_trie_root_and_nodes(items: Tuple[bytes, ...]) -> TrieRootAndData: kv_store = {} # type: Dict[Hash32, bytes] trie = HexaryTrie(kv_store, BLANK_ROOT_HASH) with trie.squash_changes() as memory_trie: for index, item in enumerate(items): index_key = rlp.encode(index, sedes=rlp.sedes.big_endian_int) memory_trie[index_key] = item return trie.root_hash, kv_store
def _make_trie_root_and_nodes_isometric_on_order( items: Tuple[bytes, ...]) -> Tuple[bytes, Dict[bytes, bytes]]: kv_store = {} # type: Dict[bytes, bytes] trie = HexaryTrie(kv_store, BLANK_ROOT_HASH) with trie.squash_changes() as memory_trie: for item in items: memory_trie[item] = item return trie.root_hash, kv_store
def test_squash_a_trie_handles_setting_new_root(prune): db = {} trie = HexaryTrie(db, prune=prune) with trie.squash_changes() as trie_batch: trie[b'\x00'] = b'33\x00' old_root_hash = trie.root_hash # The ref-count doesn't get reset at the end of the batch, but the pending prune count does # Make sure the logic here can handle that with trie.squash_changes() as trie_batch: trie_batch[b'\x00\x00\x00'] = b'\x00\x00\x00' assert trie_batch.root_hash != old_root_hash assert trie_batch.root_hash != trie.root_hash assert trie.root_hash != old_root_hash assert trie[b'\x00\x00\x00'] == b'\x00\x00\x00'
def trie_from_keys(keys, min_value_length=1): trie = HexaryTrie({}) contents = {} with trie.squash_changes() as batch: for key in keys: # flood 3's at the end of the value to make it longer. b'3' is encoded to 0x33, # so the bytes and HexBytes representation look the same. Just a convenience. value = (b'v' + key).ljust(min_value_length, b'3') batch[key] = value contents[key] = value return trie, contents
def test_hexary_trie_traverse(name, updates, expected, deleted, final_root): # Create trie with fixture data db = {} traversal_trie = HexaryTrie(db=db) with traversal_trie.squash_changes() as trie: for key, value in updates: if value is None: del trie[key] else: trie[key] = value for key in deleted: del trie[key] # Traverse full trie, starting with the root. Compares traverse() and traverse_from() results # values found while traversing found_values = set() def traverse_via_cache(parent_prefix, parent_node, child_extension): if parent_node is None: # Can't traverse_from to the root node node = traversal_trie.traverse(()) elif not len(child_extension): assert False, "For all but the root node, the child extension must not be empty" else: logging_db = KeyAccessLogger(db) single_access_trie = HexaryTrie(logging_db) node = single_access_trie.traverse_from(parent_node, child_extension) # Traversing from parent to child should touch at most one node (the child) # It might touch 0 nodes, if the child was embedded inside the parent assert len(logging_db.read_keys) in {0, 1} # Validate that traversal from the root gives you the same result: slow_node = traversal_trie.traverse(parent_prefix + child_extension) assert node == slow_node if node.value: found_values.add(node.value) for new_child in node.sub_segments: # traverse into children traverse_via_cache(parent_prefix + child_extension, node, new_child) # start traversal at root traverse_via_cache((), None, ()) # gut check that we have traversed the whole trie by checking all expected values are visited for _, expected_value in expected.items(): assert expected_value in found_values
def test_squash_a_pruning_trie_keeps_unchanged_short_root_node(): db = {} trie = HexaryTrie(db, prune=True) trie[b'any'] = b'short' root_hash = trie.root_hash with trie.squash_changes() as trie_batch: trie_batch[b'any'] = b'short' assert trie.root_hash == root_hash assert root_hash in trie_batch.db assert root_hash in db assert trie.root_hash == root_hash assert root_hash in trie.db assert root_hash in db
def _make_trie(keys): """ Make a new HexaryTrie, insert all the given keys, with the value equal to the key. Return the raw database and the HexaryTrie. """ # Create trie node_db = {} trie = HexaryTrie(node_db) with trie.squash_changes() as trie_batch: for k in keys: trie_batch[k] = k return node_db, trie
def test_hexary_trie_batch_save_keeps_last_root_data(): db = {} trie = HexaryTrie(db) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) assert old_trie[b'what floats on water?'] == b'very small rocks'
def test_squash_changes_raises_correct_error_on_underlying_missing_data(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash # what if the root node hash is missing from the beginning? del db[old_root_hash] # the appropriate exception should be raised, when squashing changes with trie.squash_changes() as memory_trie: with pytest.raises(MissingTrieNode): memory_trie[b'what floats on water?']
def test_hexary_trie_empty_squash_does_not_read_root(): db = {} trie = HexaryTrie(db=db) trie[b'AAA'] = b'LONG' * 32 trie[b'BBB'] = b'LONG' * 32 trie[b'\xffEE'] = b'LONG' * 32 flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=trie.root_hash) with flag_trie.squash_changes(): # root node should not be read if no changes are made during squash pass assert len(flagged_usage_db.read_keys) == 0
def test_hexary_trie_batch_save_drops_last_root_data_when_pruning(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) with pytest.raises(KeyError): old_trie.root_node
def trie_from_keys(keys, minimum_value_length=0, prune=False): """ Make a new HexaryTrie, insert all the given keys, with the value equal to the key. Return the raw database and the HexaryTrie. """ # Create trie node_db = {} trie = HexaryTrie(node_db, prune=prune) with trie.squash_changes() as trie_batch: for k in keys: # Flood 3's at the end of the value to make it longer. b'3' is encoded to 0x33, # so the bytes and HexBytes representation look the same. Just a convenience. trie_batch[k] = k.ljust(minimum_value_length, b'3') return node_db, trie
def test_hexary_trie_ref_count(name, updates, expected, deleted, final_root): db = {} trie = HexaryTrie(db=db) with trie.squash_changes() as memory_trie: for key, value in updates: if value is None: del memory_trie[key] else: memory_trie[key] = value verify_ref_count(memory_trie) for key in deleted: del memory_trie[key] verify_ref_count(memory_trie)
def test_squash_changes_can_still_access_underlying_deleted_data(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') verify_ref_count(memory_trie) # change to a root hash that the memory trie doesn't have anymore memory_trie.root_hash memory_trie.root_hash = old_root_hash assert memory_trie[b'what floats on water?'] == b'very small rocks'
def test_hexary_trie_batch_save_drops_last_root_data_when_pruning(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') verify_ref_count(memory_trie) assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) with pytest.raises(MissingTraversalNode) as excinfo: old_trie.root_node assert encode_hex(old_root_hash) in str(excinfo.value)
def test_hexary_trie_squash_all_changes(updates, deleted): db = {} trie = HexaryTrie(db=db) expected = {} root_hashes = set() with trie.squash_changes() as memory_trie: for _index, (key, value) in enumerate(updates): if value is None: del memory_trie[key] expected.pop(key, None) else: memory_trie[key] = value expected[key] = value root_hashes.add(memory_trie.root_hash) for _index, key in enumerate(deleted): del memory_trie[key] expected.pop(key, None) root_hashes.add(memory_trie.root_hash) final_root_hash = trie.root_hash # access all of the values in the trie, triggering reads for all the database keys # that support the final state flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=final_root_hash) for key, val in expected.items(): assert flag_trie[key] == val # assert that no unnecessary database values were created unread = flagged_usage_db.unread_keys() straggler_data = {k: (db[k], decode_node(db[k])) for k in unread} assert len(unread) == 0, straggler_data # rebuild without squashing, to compare root hash verbose_trie = HexaryTrie({}) for key, value in updates: if value is None: del verbose_trie[key] else: verbose_trie[key] = value for _index, key in enumerate(deleted): del verbose_trie[key] assert final_root_hash == verbose_trie.root_hash
def test_squash_changes_raises_correct_error_on_new_deleted_data(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') verify_ref_count(memory_trie) middle_root_hash = memory_trie.root_hash memory_trie.set(b'what floats on water?', b'ooooohh') memory_trie.root_hash verify_ref_count(memory_trie) # change to a root hash that the memory trie doesn't have anymore memory_trie.root_hash = middle_root_hash with pytest.raises(MissingTrieNode): memory_trie[b'what floats on water?']
def test_squash_changes_reverts_trie_root_on_exception(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'\x00', b'B' * 32) trie.set(b'\xff', b'C' * 32) old_root_hash = trie.root_hash # delete the node that will be used during trie fixup del db[trie.root_node.raw[0xf]] with pytest.raises(MissingTrieNode): with trie.squash_changes() as memory_trie: try: memory_trie[b'\x11'] = b'new val' except MissingTrieNode: assert False, "Only the squash_changes context exit should raise this exception" del memory_trie[b'\xff'] assert trie.root_hash == old_root_hash
def _insert_squash_test(): trie = HexaryTrie(db={}) with trie.squash_changes() as memory_trie: for k, v in sorted(TEST_DATA.items()): memory_trie[k] = v return trie