def test_hexary_trie_saving_final_root(name, updates, expected, deleted, final_root): db = {} trie = HexaryTrie(db=db) with trie.squash_changes() as memory_trie: for key, value in updates: if value is None: del memory_trie[key] else: memory_trie[key] = value for key in deleted: del memory_trie[key] # access all of the values in the trie, triggering reads for all the database keys # that support the final state flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=trie.root_hash) for key, val in expected.items(): assert flag_trie[key] == val # assert that no unnecessary database values were created unread = flagged_usage_db.unread_keys() straggler_data = {k: (db[k], decode_node(db[k])) for k in unread} assert len(unread) == 0, straggler_data actual_root = trie.root_hash assert actual_root == final_root
def test_hexary_trie_batch_save_keeps_last_root_data(): db = {} trie = HexaryTrie(db) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) assert old_trie[b'what floats on water?'] == b'very small rocks'
def test_hexary_trie_batch_save_drops_last_root_data_when_pruning(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) with pytest.raises(KeyError): old_trie.root_node
def test_hexary_trie_empty_squash_does_not_read_root(): db = {} trie = HexaryTrie(db=db) trie[b'AAA'] = b'LONG' * 32 trie[b'BBB'] = b'LONG' * 32 trie[b'\xffEE'] = b'LONG' * 32 flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=trie.root_hash) with flag_trie.squash_changes(): # root node should not be read if no changes are made during squash pass assert len(flagged_usage_db.read_keys) == 0
def add_transaction(self, block_header: BlockHeaderAPI, index_key: int, transaction: SignedTransactionAPI) -> Hash32: transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) transaction_db[index_key] = rlp.encode(transaction) return transaction_db.root_hash
def __init__(self, db: AtomicDatabaseAPI, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = KeyAccessLoggerAtomicDB(db, log_missing_keys=False) self._batchdb = BatchDB(self._raw_store_db) self._batchtrie = BatchDB(self._raw_store_db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_logger = KeyAccessLoggerDB(self._trie, log_missing_keys=False) self._trie_cache = CacheDB(self._trie_logger) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {} self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root self._accessed_accounts: Set[Address] = set() self._accessed_bytecodes: Set[Address] = set() # Track whether an account or slot have been accessed during a given transaction: self._reset_access_counters()
def get_transaction_by_index( self, block_number: BlockNumber, transaction_index: int, transaction_class: Type[SignedTransactionAPI] ) -> SignedTransactionAPI: """ Returns the transaction at the specified `transaction_index` from the block specified by `block_number` from the canonical chain. Raises TransactionNotFound if no block """ try: block_header = self.get_canonical_block_header_by_number( block_number) except HeaderNotFound: raise TransactionNotFound( f"Block {block_number} is not in the canonical chain") transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) encoded_index = rlp.encode(transaction_index) encoded_transaction = transaction_db[encoded_index] if encoded_transaction != b'': return rlp.decode(encoded_transaction, sedes=transaction_class) else: raise TransactionNotFound( f"No transaction is at index {transaction_index} of block {block_number}" )
def __init__(self, db: AtomicDatabaseAPI, peer_pool: ETHPeerPool, queen_tracker: QueenTrackerAPI, event_bus: EndpointAPI) -> None: self.logger = get_logger('trinity.sync.beam.BeamDownloader') self._db = db self._trie_db = HexaryTrie(db) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_events: Set[asyncio.Event] = set() self._peer_pool = peer_pool # Track node data for upcoming blocks self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, # Everything is the same priority, for now lambda node_hash: 0, ) self._num_urgent_requests_by_peer = Counter() self._num_predictive_requests_by_peer = Counter() self._queen_tracker = queen_tracker
def _make_trie_root_and_nodes_isometric_on_order( items: Tuple[bytes, ...]) -> Tuple[bytes, Dict[bytes, bytes]]: kv_store = {} # type: Dict[bytes, bytes] trie = HexaryTrie(kv_store, BLANK_ROOT_HASH) for item in items: trie[item] = item return trie.root_hash, kv_store
def test_trie_using_fixtures(fixture_name, fixture): keys_and_values = fixture['in'] deletes = tuple(k for k, v in keys_and_values if v is None) remaining = {k: v for k, v in keys_and_values if k not in deletes} for kv_permutation in itertools.islice( itertools.permutations(keys_and_values), 100): print("in it") trie = HexaryTrie(db={}) for key, value in kv_permutation: if value is None: del trie[key] else: trie[key] = value for key in deletes: del trie[key] for key, expected_value in remaining.items(): assert key in trie actual_value = trie[key] assert actual_value == expected_value for key in deletes: assert key not in trie expected_root = fixture['root'] actual_root = trie.root_hash assert actual_root == expected_root
def test_trie_using_fixtures(name, updates, expected, deleted, final_root): trie = HexaryTrie(db={}) for key, value in updates: if value is None: del trie[key] else: trie[key] = value assert_proof(trie, key) for key in deleted: del trie[key] for key, expected_value in expected.items(): assert key in trie actual_value = trie[key] assert actual_value == expected_value for key in deleted: assert key not in trie actual_root = trie.root_hash assert actual_root == final_root for valid_proof_key in expected: assert_proof(trie, valid_proof_key) for absence_proof_key in deleted: assert_proof(trie, absence_proof_key)
def get_transaction_by_index( self, block_number: BlockNumber, transaction_index: int, transaction_class: Type['BaseTransaction']) -> 'BaseTransaction': """ Returns the transaction at the specified `transaction_index` from the block specified by `block_number` from the canonical chain. Raises TransactionNotFound if no block """ try: block_header = self.get_canonical_block_header_by_number( block_number) except HeaderNotFound: raise TransactionNotFound( "Block {} is not in the canonical chain".format(block_number)) transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) encoded_index = rlp.encode(transaction_index) if encoded_index in transaction_db: encoded_transaction = transaction_db[encoded_index] return rlp.decode(encoded_transaction, sedes=transaction_class) else: raise TransactionNotFound( "No transaction is at index {} of block {}".format( transaction_index, block_number))
def persist_block_to_db(self, block): ''' Chain must do follow-up work to persist transactions to db ''' new_canonical_headers = self.persist_header_to_db(block.header) # Persist the transaction bodies transaction_db = HexaryTrie(self.db, root_hash=BLANK_ROOT_HASH) for i, transaction in enumerate(block.transactions): index_key = rlp.encode(i, sedes=rlp.sedes.big_endian_int) transaction_db[index_key] = rlp.encode(transaction) assert transaction_db.root_hash == block.header.transaction_root for header in new_canonical_headers: for index, transaction_hash in enumerate( self.get_block_transaction_hashes(header)): self._add_transaction_to_canonical_chain( transaction_hash, header, index) # Persist the uncles list self.db.set( block.header.uncles_hash, rlp.encode(block.uncles, sedes=rlp.sedes.CountableList(type(block.header))), )
def test_hexary_trie_missing_traversal_node_with_traverse_from(): db = {} trie = HexaryTrie(db, prune=True) key1 = to_bytes(0x0123) trie.set( key1, b'use a value long enough that it must be hashed according to trie spec' ) key2 = to_bytes(0x1234) trie.set(key2, b'val2') # delete first child of the root root_node = trie.root_node first_child_hash = root_node.raw[0] del db[first_child_hash] # Get exception with relevant info about lookup nibbles with pytest.raises(MissingTraversalNode) as exc_info: trie.traverse_from(root_node, (0, 1, 2, 3)) exception = exc_info.value assert exception.nibbles_traversed == (0, ) assert encode_hex(first_child_hash) in str(exception) # Other keys are still traversable node = trie.traverse((1, )) assert node.value == b'val2' assert node.sub_segments == ()
def traverse_via_cache(parent_prefix, parent_node, child_extension): if parent_node is None: # Can't traverse_from to the root node node = traversal_trie.traverse(()) elif not len(child_extension): assert False, "For all but the root node, the child extension must not be empty" else: logging_db = KeyAccessLogger(db) single_access_trie = HexaryTrie(logging_db) node = single_access_trie.traverse_from(parent_node, child_extension) # Traversing from parent to child should touch at most one node (the child) # It might touch 0 nodes, if the child was embedded inside the parent assert len(logging_db.read_keys) in {0, 1} # Validate that traversal from the root gives you the same result: slow_node = traversal_trie.traverse(parent_prefix + child_extension) assert node == slow_node if node.value: found_values.add(node.value) for new_child in node.sub_segments: # traverse into children traverse_via_cache(parent_prefix + child_extension, node, new_child)
def _make_trie_root_and_nodes(items: Tuple[bytes, ...]) -> Tuple[bytes, Dict[bytes, bytes]]: kv_store = {} # type: Dict[bytes, bytes] trie = HexaryTrie(MemoryDB(kv_store), BLANK_ROOT_HASH) for index, item in enumerate(items): index_key = rlp.encode(index, sedes=rlp.sedes.big_endian_int) trie[index_key] = item return trie.root_hash, kv_store
def test_squash_changes_does_not_prune_on_missing_trie_node( inserts_and_updates): inserts, updates = inserts_and_updates node_db = {} trie = HexaryTrie(node_db) with trie.squash_changes() as trie_batch: for key, value in inserts: trie_batch[key] = value missing_nodes = dict(node_db) node_db.clear() with trie.squash_changes() as trie_batch: for key, value in updates: # repeat until change is complete change_complete = False while not change_complete: # Catch any missing nodes during trie change, and fix them up. # This is equivalent to Trinity's "Beam Sync". previous_db = trie_batch.db.copy() try: if value is None: del trie_batch[key] else: trie_batch[key] = value except MissingTrieNode as exc: # When an exception is raised, we must never change the database current_db = trie_batch.db.copy() assert current_db == previous_db node_db[exc.missing_node_hash] = missing_nodes.pop( exc.missing_node_hash) else: change_complete = True
def test_hexary_trie_root_node_annotation(): trie = HexaryTrie({}) trie[b'\x41A'] = b'LONG' * 32 trie[b'\xffE'] = b'LONG' * 32 root = trie.root_node assert root == trie.traverse(())
def test_hexary_trie_at_root_lookups(): changes = ((b'ab', b'b' * 32), (b'ac', b'c' * 32), (b'ac', None), (b'ad', b'd' * 32)) # track which key is expected to be present in which root expected_by_root = defaultdict(set) missing_by_root = defaultdict(set) trie = HexaryTrie({}) for key, val in changes: if val is None: del trie[key] missing_by_root[trie.root_hash].add(key) else: trie[key] = val expected_by_root[trie.root_hash].add((key, val)) # check that the values are still reachable at the old state roots for root_hash, expected_items in expected_by_root.items(): for key, val in expected_items: with trie.at_root(root_hash) as snapshot: assert key in snapshot assert snapshot[key] == val # check that missing values are not reachable at the old state roots for root_hash, missing_keys in missing_by_root.items(): for key in missing_keys: with trie.at_root(root_hash) as snapshot: assert key not in snapshot
def generate_proof_blob(block_dict, tx_index): header = block_header(block_dict) mpt = HexaryTrie(db={}) for tx_dict in block_dict["transactions"]: key = rlp.encode(utils.parse_as_int(tx_dict['transactionIndex'])) mpt.set(key, rlp_transaction(tx_dict)) if mpt.root_hash != normalize_bytes(block_dict['transactionsRoot']): raise ValueError( "Tx trie root hash does not match. Calculated: {} Sent: {}".format( mpt.root_hash.hex(), normalize_bytes(block_dict['transactionsRoot']).hex())) mpt_key_nibbles = bytes_to_nibbles(rlp.encode(tx_index)) mpt_path, stack_indexes, stack = generate_proof(mpt, mpt_key_nibbles) proof_blob = rlp.encode([ 1, # proof_type header, tx_index, bytes(mpt_path), bytes(stack_indexes), stack, ]) return proof_blob
def __init__(self, db: AtomicDatabaseAPI, peer_pool: ETHPeerPool, queen_tracker: QueenTrackerAPI, event_bus: EndpointAPI) -> None: self.logger = get_logger('trinity.sync.beam.BeamDownloader') self._db = db self._trie_db = HexaryTrie(db) self._event_bus = event_bus # Track the needed node data that is urgent and important: buffer_size = MAX_STATE_FETCH * REQUEST_BUFFER_MULTIPLIER self._node_tasks = TaskQueue[Hash32](buffer_size, lambda task: 0) # list of events waiting on new data self._new_data_event: asyncio.Event = asyncio.Event() self._preview_events = {} self._peer_pool = peer_pool # Track node data for upcoming blocks self._block_number_lookup = defaultdict(lambda: BlockNumber(0)) self._maybe_useful_nodes = TaskQueue[Hash32]( buffer_size, # Prefer trie nodes from earliest blocks lambda node_hash: self._block_number_lookup[node_hash], ) self._num_urgent_requests_by_peer = Counter() self._num_predictive_requests_by_peer = Counter() self._queen_tracker = queen_tracker self._threadpool = ThreadPoolExecutor() asyncio.get_event_loop().set_default_executor(self._threadpool)
def _get_read_trie(self) -> HexaryTrie: if self._write_trie is not None: return self._write_trie else: # Creating "HexaryTrie" is a pretty light operation, so not a huge cost # to create a new one at every read, but we could # cache the read trie, if this becomes a bottleneck. return HexaryTrie(self._db, root_hash=self._starting_root_hash)
def test_hexary_trie_batch_save_drops_last_root_data_when_pruning(): db = {} trie = HexaryTrie(db, prune=True) trie.set(b'what floats on water?', b'very small rocks') old_root_hash = trie.root_hash with trie.squash_changes() as memory_trie: memory_trie.set(b'what floats on water?', b'a duck') verify_ref_count(memory_trie) assert trie[b'what floats on water?'] == b'a duck' old_trie = HexaryTrie(db, root_hash=old_root_hash) with pytest.raises(MissingTraversalNode) as excinfo: old_trie.root_node assert encode_hex(old_root_hash) in str(excinfo.value)
def test_hexary_trie_squash_all_changes(updates, deleted): db = {} trie = HexaryTrie(db=db) expected = {} root_hashes = set() with trie.squash_changes() as memory_trie: for _index, (key, value) in enumerate(updates): if value is None: del memory_trie[key] expected.pop(key, None) else: memory_trie[key] = value expected[key] = value root_hashes.add(memory_trie.root_hash) for _index, key in enumerate(deleted): del memory_trie[key] expected.pop(key, None) root_hashes.add(memory_trie.root_hash) final_root_hash = trie.root_hash # access all of the values in the trie, triggering reads for all the database keys # that support the final state flagged_usage_db = KeyAccessLogger(db) flag_trie = HexaryTrie(flagged_usage_db, root_hash=final_root_hash) for key, val in expected.items(): assert flag_trie[key] == val # assert that no unnecessary database values were created unread = flagged_usage_db.unread_keys() straggler_data = {k: (db[k], decode_node(db[k])) for k in unread} assert len(unread) == 0, straggler_data # rebuild without squashing, to compare root hash verbose_trie = HexaryTrie({}) for key, value in updates: if value is None: del verbose_trie[key] else: verbose_trie[key] = value for _index, key in enumerate(deleted): del verbose_trie[key] assert final_root_hash == verbose_trie.root_hash
def _make_trie_root_and_nodes(items: Tuple[bytes, ...]) -> TrieRootAndData: kv_store = {} # type: Dict[Hash32, bytes] trie = HexaryTrie(kv_store, BLANK_ROOT_HASH) with trie.squash_changes() as memory_trie: for index, item in enumerate(items): index_key = rlp.encode(index, sedes=rlp.sedes.big_endian_int) memory_trie[index_key] = item return trie.root_hash, kv_store
def get_receipts(self, header, receipt_class): receipt_db = HexaryTrie(db=self.db, root_hash=header.receipt_root) for receipt_idx in itertools.count(): receipt_key = rlp.encode(receipt_idx) if receipt_key in receipt_db: receipt_data = receipt_db[receipt_key] yield rlp.decode(receipt_data, sedes=receipt_class) else: break
def _get_write_trie(self) -> HexaryTrie: if self._trie_nodes_batch is None: self._trie_nodes_batch = BatchDB(self._db, read_through_deletes=True) if self._write_trie is None: batch_db = self._trie_nodes_batch self._write_trie = HexaryTrie(batch_db, root_hash=self._starting_root_hash, prune=True) return self._write_trie
def add_receipt(self, block_header: BlockHeader, index_key: int, receipt: Receipt) -> Hash32: """ Adds the given receipt to the provide block header. Returns the updated `receipts_root` for updated block header. """ receipt_db = HexaryTrie(db=self.db, root_hash=block_header.receipt_root) receipt_db[index_key] = rlp.encode(receipt) return receipt_db.root_hash
def get_block_transactions(self, block_header, transaction_class): transaction_db = HexaryTrie(self.db, root_hash=block_header.transaction_root) for transaction_idx in itertools.count(): transaction_key = rlp.encode(transaction_idx) if transaction_key in transaction_db: transaction_data = transaction_db[transaction_key] yield rlp.decode(transaction_data, sedes=transaction_class) else: break
def __init__(self, db, root_hash=BLANK_ROOT_HASH, read_only=False): # Keep a reference to the original db instance to use it as part of _get_account()'s cache # key. self._unwrapped_db = db if read_only: self.db = ImmutableDB(db) else: self.db = db self.__trie = HashTrie(HexaryTrie(self.db, root_hash))