def test_trie_fog_nearest_unknown(): fog = HexaryTrieFog() empty_prefix = () assert fog.nearest_unknown((1, 2, 3)) == empty_prefix branched = fog.explore(empty_prefix, ((1, 1), (5, 5))) # Test shallower assert branched.nearest_unknown((0, )) == (1, 1) assert branched.nearest_unknown((1, )) == (1, 1) assert branched.nearest_unknown((2, )) == (1, 1) assert branched.nearest_unknown((4, )) == (5, 5) assert branched.nearest_unknown((5, )) == (5, 5) assert branched.nearest_unknown((6, )) == (5, 5) # Test same level assert branched.nearest_unknown((0, 9)) == (1, 1) assert branched.nearest_unknown((1, 1)) == (1, 1) assert branched.nearest_unknown((2, 1)) == (1, 1) assert branched.nearest_unknown((3, 2)) == (1, 1) assert branched.nearest_unknown((3, 3)) == (5, 5) assert branched.nearest_unknown((4, 9)) == (5, 5) assert branched.nearest_unknown((5, 5)) == (5, 5) assert branched.nearest_unknown((6, 1)) == (5, 5) # Test deeper assert branched.nearest_unknown((0, 9, 9)) == (1, 1) assert branched.nearest_unknown((1, 1, 0)) == (1, 1) assert branched.nearest_unknown((2, 1, 1)) == (1, 1) assert branched.nearest_unknown((4, 9, 9)) == (5, 5) assert branched.nearest_unknown((5, 5, 0)) == (5, 5) assert branched.nearest_unknown((6, 1, 1)) == (5, 5)
def nodes(self) -> Iterable[Tuple[Nibbles, HexaryTrieNode]]: """ Iterate over all trie nodes, starting at the left-most available one (the root), then the left-most available one (its left-most child) and so on. """ next_fog = HexaryTrieFog() cache = TrieFrontierCache() while True: try: # Always get the furthest left unexplored value nearest_prefix = next_fog.nearest_right(()) except PerfectVisibility: # No more unexplored nodes return try: cached_node, uncached_key = cache.get(nearest_prefix) except KeyError: node = self._trie.traverse(nearest_prefix) else: node = self._trie.traverse_from(cached_node, uncached_key) next_fog = next_fog.explore(nearest_prefix, node.sub_segments) if node.sub_segments: cache.add(nearest_prefix, node, node.sub_segments) else: cache.delete(nearest_prefix) yield nearest_prefix, node
def test_trie_fog_explore_invalid(sub_segments): """ Cannot explore with a sub_segment that is a child of another sub_segment, or a duplicate """ fog = HexaryTrieFog() with pytest.raises(ValidationError): fog.explore((), sub_segments)
def test_trie_fog_composition_equality(): fog = HexaryTrieFog() empty_prefix = () single_exploration = fog.explore(empty_prefix, ((9, 9, 9), )) half_explore = fog.explore(empty_prefix, ((9, ), )) full_explore = half_explore.explore((9, ), ((9, 9), )) assert single_exploration == full_explore
def test_trie_fog_expand_before_mark_all_complete(): fog = HexaryTrieFog() empty_prefix = () branched = fog.explore(empty_prefix, ((1, ), (5, ))) assert not branched.is_complete # complete all sub-segments at once completed = branched.mark_all_complete(((1, ), (5, ))) assert completed.is_complete
def test_trie_fog_nearest_right_empty(): fog = HexaryTrieFog() empty_prefix = () fully_explored = fog.explore(empty_prefix, ()) with pytest.raises(PerfectVisibility): fully_explored.nearest_right(()) with pytest.raises(PerfectVisibility): fully_explored.nearest_right((0, ))
def test_trie_fog_serialize(expand_points): """ Build a bunch of random trie fogs, serialize them to a bytes representation, then deserialize them back. Validate that all deserialized tries are equal to their starting tries and respond to nearest_unknown the same as the original. """ starting_fog = HexaryTrieFog() for next_index, children in expand_points: try: next_unknown = starting_fog.nearest_unknown(next_index) except PerfectVisibility: # Have already completely explored the trie break starting_fog = starting_fog.explore(next_unknown, children) if expand_points: assert starting_fog != HexaryTrieFog() else: assert starting_fog == HexaryTrieFog() resumed_fog = HexaryTrieFog.deserialize(starting_fog.serialize()) assert resumed_fog == starting_fog if starting_fog.is_complete: assert resumed_fog.is_complete else: for search_index, _ in expand_points: nearest_unknown_original = starting_fog.nearest_unknown( search_index) nearest_unknown_deserialized = resumed_fog.nearest_unknown( search_index) assert nearest_unknown_deserialized == nearest_unknown_original
def test_trie_walk_backfilling_with_traverse_from(trie_keys, minimum_value_length, index_nibbles): """ Like test_trie_walk_backfilling but using the HexaryTrie.traverse_from API """ node_db, trie = trie_from_keys(trie_keys, minimum_value_length, prune=True) index_key = Nibbles(index_nibbles) # delete all nodes dropped_nodes = dict(node_db) node_db.clear() # traverse_from() cannot traverse to the root node, so resolve that manually try: root = trie.root_node except MissingTraversalNode as exc: node_db[exc.missing_node_hash] = dropped_nodes.pop( exc.missing_node_hash) root = trie.root_node # Core of the test: use the fog to convince yourself that you've traversed the entire trie fog = HexaryTrieFog() for _ in range(100000): # Look up the next prefix to explore try: nearest_key = fog.nearest_unknown(index_key) except PerfectVisibility: # Test Complete! break # Try to navigate to the prefix, catching any errors about nodes missing from the DB try: node = trie.traverse_from(root, nearest_key) except MissingTraversalNode as exc: # Node was missing, so fill in the node and try again node_db[exc.missing_node_hash] = dropped_nodes.pop( exc.missing_node_hash) continue else: # Node was found, use the found node to "lift the fog" down to its longer prefixes fog = fog.explore(nearest_key, node.sub_segments) else: assert False, "Must finish iterating the trie within ~100k runs" # Make sure we removed all the dropped nodes to push them back to the trie db assert len(dropped_nodes) == 0 # Make sure the fog agrees that it's completed assert fog.is_complete # Make sure we can walk the whole trie without any missing nodes iterator = NodeIterator(trie) found_keys = set(iterator.keys()) # Make sure we found all the keys assert found_keys == set(trie_keys)
def test_trie_fog_expand_before_complete(): fog = HexaryTrieFog() empty_prefix = () branched = fog.explore(empty_prefix, ((1, ), (5, ))) assert not branched.is_complete # complete only one prefix single_prefix = branched.explore((1, ), ()) assert not single_prefix.is_complete completed = single_prefix.explore((5, ), ()) assert completed.is_complete
def test_trie_fog_completion(): fog = HexaryTrieFog() # fog should start with *nothing* verified assert not fog.is_complete # completing the empty prefix should immediately mark it as complete empty_prefix = () completed_fog = fog.explore(empty_prefix, ()) assert completed_fog.is_complete # original fog should be untouched assert not fog.is_complete
def test_trie_fog_immutability(): fog = HexaryTrieFog() fog1 = fog.explore((), ((1, ), (2, ))) fog2 = fog1.explore((1, ), ((3, ), )) assert fog.nearest_unknown(()) == () assert fog1.nearest_unknown(()) == (1, ) assert fog2.nearest_unknown(()) == (1, 3) assert fog != fog1 assert fog1 != fog2 assert fog != fog2
def test_trie_walk_backfilling(trie_keys, index_nibbles): """ - Create a random trie of 3-byte keys - Drop all node bodies from the trie - Use fog to index into random parts of the trie - Every time a node is missing from the DB, replace it and retry - Repeat until full trie has been explored with the HexaryTrieFog """ node_db, trie = _make_trie(trie_keys) index_key = Nibbles(index_nibbles) # delete all nodes dropped_nodes = dict(node_db) node_db.clear() # Core of the test: use the fog to convince yourself that you've traversed the entire trie fog = HexaryTrieFog() for _ in range(100000): # Look up the next prefix to explore try: nearest_key = fog.nearest_unknown(index_key) except PerfectVisibility: # Test Complete! break # Try to navigate to the prefix, catching any errors about nodes missing from the DB try: node = trie.traverse(nearest_key) except MissingTraversalNode as exc: # Node was missing, so fill in the node and try again node_db[exc.missing_node_hash] = dropped_nodes.pop( exc.missing_node_hash) continue else: # Node was found, use the found node to "lift the fog" down to its longer prefixes fog = fog.explore(nearest_key, node.sub_segments) else: assert False, "Must finish iterating the trie within ~100k runs" # Make sure we removed all the dropped nodes to push them back to the trie db assert len(dropped_nodes) == 0 # Make sure the fog agrees that it's completed assert fog.is_complete # Make sure we can walk the whole trie without any missing nodes iterator = NodeIterator(trie) found_keys = set(iterator.keys()) # Make sure we found all the keys assert found_keys == set(trie_keys)
def test_trie_fog_nearest_right(): fog = HexaryTrieFog() empty_prefix = () assert fog.nearest_right((1, 2, 3)) == empty_prefix branched = fog.explore(empty_prefix, ((1, 1), (5, 5))) # Test shallower assert branched.nearest_right((0, )) == (1, 1) assert branched.nearest_right((1, )) == (1, 1) assert branched.nearest_right((2, )) == (5, 5) assert branched.nearest_right((4, )) == (5, 5) assert branched.nearest_right((5, )) == (5, 5) with pytest.raises(FullDirectionalVisibility): assert branched.nearest_right((6, )) # Test same level assert branched.nearest_right((0, 9)) == (1, 1) assert branched.nearest_right((1, 1)) == (1, 1) assert branched.nearest_right((2, 1)) == (5, 5) assert branched.nearest_right((3, 2)) == (5, 5) assert branched.nearest_right((3, 3)) == (5, 5) assert branched.nearest_right((4, 9)) == (5, 5) assert branched.nearest_right((5, 5)) == (5, 5) with pytest.raises(FullDirectionalVisibility): assert branched.nearest_right((5, 6)) with pytest.raises(FullDirectionalVisibility): assert branched.nearest_right((6, 1)) # Test deeper assert branched.nearest_right((0, 9, 9)) == (1, 1) assert branched.nearest_right((1, 1, 0)) == (1, 1) assert branched.nearest_right((2, 1, 1)) == (5, 5) assert branched.nearest_right((4, 9, 9)) == (5, 5) assert branched.nearest_right((5, 5, 0)) == (5, 5) assert branched.nearest_right((5, 5, 15)) == (5, 5) with pytest.raises(FullDirectionalVisibility): assert branched.nearest_right((6, 0, 0))
def test_trie_walk_root_change_with_cached_traverse_from( do_cache_reset, trie_keys, minimum_value_length, number_explorations, trie_changes, index_nibbles, index_nibbles2, ): """ Like test_trie_walk_root_change_with_traverse but using HexaryTrie.traverse_from when possible. """ # Turn on pruning to simulate having peers lose access to old trie nodes over time node_db, trie = trie_from_keys(trie_keys, minimum_value_length, prune=True) number_explorations %= len(node_db) cache = TrieFrontierCache() # delete all nodes missing_nodes = dict(node_db) node_db.clear() # First walk index_key = tuple(index_nibbles) fog = HexaryTrieFog() for _ in range(number_explorations): try: nearest_prefix = fog.nearest_unknown(index_key) except PerfectVisibility: assert False, "Number explorations should be lower than database size, shouldn't finish" try: # Use the cache, if possible, to look up the parent node of nearest_prefix try: cached_node, uncached_key = cache.get(nearest_prefix) except KeyError: # Must navigate from the root. In this 1st walk, only the root should not be cached assert nearest_prefix == () node = trie.traverse(nearest_prefix) else: # Only one database lookup required node = trie.traverse_from(cached_node, uncached_key) # Note that a TraversedPartialPath should not happen here, because no trie changes # have happened, so we should have a perfect picture of the trie except MissingTraversalNode as exc: # Each missing node should only need to be retrieve (at most) once node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) continue else: fog = fog.explore(nearest_prefix, node.sub_segments) if node.sub_segments: cache.add(nearest_prefix, node, node.sub_segments) else: cache.delete(nearest_prefix) # Modify Trie mid-walk, keeping track of the expected list of final keys expected_final_keys = set(trie_keys) with trie.squash_changes() as trie_batch: for change in trie_changes: # repeat until change is complete change_complete = False while not change_complete: # Catch any missing nodes during trie change, and fix them up. # This is equivalent to Trinity's "Beam Sync". try: if isinstance(change, bytes): # insert! trie_batch[change] = change.rjust( minimum_value_length, b'3') expected_final_keys.add(change) else: key_index, new_value = change key = trie_keys[key_index % len(trie_keys)] if new_value is None: del trie_batch[key] expected_final_keys.discard(key) else: # update (though may be an insert, if there was a previous delete) trie_batch[key] = new_value expected_final_keys.add(key) except MissingTrieNode as exc: node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) else: change_complete = True # Second walk index_key2 = tuple(index_nibbles2) if do_cache_reset: cache = TrieFrontierCache() for _ in range(100000): try: nearest_prefix = fog.nearest_unknown(index_key2) except PerfectVisibility: # Complete! break try: try: cached_node, uncached_key = cache.get(nearest_prefix) except KeyError: node = trie.traverse(nearest_prefix) cached_node = None else: node = trie.traverse_from(cached_node, uncached_key) except MissingTraversalNode as exc: node_hash = exc.missing_node_hash if node_hash in missing_nodes: # Each missing node should only need to be retrieve (at most) once node_db[node_hash] = missing_nodes.pop(node_hash) elif cached_node is not None: # But, it could also be missing because of an old cached node # Delete the bad cache and try again cache.delete(nearest_prefix) else: raise AssertionError(f"Bad node hash request: {node_hash}") continue except TraversedPartialPath as exc: node = exc.simulated_node sub_segments = node.sub_segments fog = fog.explore(nearest_prefix, sub_segments) if sub_segments: cache.add(nearest_prefix, node, sub_segments) else: cache.delete(nearest_prefix) else: assert False, "Must finish iterating the trie within ~100k runs" # Final assertions assert fog.is_complete # We do *not* know that we have replaced all the missing_nodes, because of the trie changes # Make sure we can walk the whole trie without any missing nodes iterator = NodeIterator(trie) found_keys = set(iterator.keys()) assert found_keys == expected_final_keys
def test_trie_walk_root_change_with_traverse( trie_keys, minimum_value_length, number_explorations, trie_changes, index_nibbles, index_nibbles2, ): """ Like test_trie_walk_backfilling, but: - Halt the trie walk early - Modify the trie according to parameter trie_changes - Continue walking the trie using the same HexaryTrieFog, until completion - Verify that all required database values were replaced (where only the nodes under the NEW trie root are required) """ # Turn on pruning to simulate having peers lose access to old trie nodes over time node_db, trie = trie_from_keys(trie_keys, minimum_value_length, prune=True) number_explorations %= len(node_db) # delete all nodes missing_nodes = dict(node_db) node_db.clear() # First walk index_key = tuple(index_nibbles) fog = HexaryTrieFog() for _ in range(number_explorations): # Look up the next prefix to explore try: nearest_key = fog.nearest_unknown(index_key) except PerfectVisibility: assert False, "Number explorations should be lower than database size, shouldn't finish" # Try to navigate to the prefix, catching any errors about nodes missing from the DB try: node = trie.traverse(nearest_key) # Note that a TraversedPartialPath should not happen here, because no trie changes # have happened, so we should have a perfect picture of the trie except MissingTraversalNode as exc: # Node was missing, so fill in the node and try again node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) continue else: # Node was found, use the found node to "lift the fog" down to its longer prefixes fog = fog.explore(nearest_key, node.sub_segments) # Modify Trie mid-walk, keeping track of the expected list of final keys expected_final_keys = set(trie_keys) with trie.squash_changes() as trie_batch: for change in trie_changes: # repeat until change is complete change_complete = False while not change_complete: # Catch any missing nodes during trie change, and fix them up. # This is equivalent to Trinity's "Beam Sync". try: if isinstance(change, bytes): # insert! trie_batch[change] = change expected_final_keys.add(change) else: key_index, new_value = change key = trie_keys[key_index % len(trie_keys)] if new_value is None: del trie_batch[key] expected_final_keys.discard(key) else: # update (though may be an insert, if there was a previous delete) trie_batch[key] = new_value expected_final_keys.add(key) except MissingTrieNode as exc: node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) else: change_complete = True # Second walk index_key2 = tuple(index_nibbles2) for _ in range(100000): try: nearest_key = fog.nearest_unknown(index_key2) except PerfectVisibility: # Complete! break try: node = trie.traverse(nearest_key) sub_segments = node.sub_segments except MissingTraversalNode as exc: node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) continue except TraversedPartialPath as exc: # You might only get part-way down a path of nibbles if your fog is based on an old trie # Determine the new sub-segments that are accessible from this partial traversal sub_segments = exc.simulated_node.sub_segments # explore the fog if there were no exceptions, or if you traversed a partial path fog = fog.explore(nearest_key, sub_segments) else: assert False, "Must finish iterating the trie within ~100k runs" # Final assertions assert fog.is_complete # We do *not* know that we have replaced all the missing_nodes, because of the trie changes # Make sure we can walk the whole trie without any missing nodes iterator = NodeIterator(trie) found_keys = set(iterator.keys()) assert found_keys == expected_final_keys