def test_batch_db_read_through_should_fail_to_commit_deletes(base_db): batch_db = BatchDB(base_db, read_through_deletes=True) # When a batch_db is reading through it's deletes, those deletes # should never be applied. It's nonsense with pytest.raises(ValidationError): batch_db.commit(apply_deletes=True)
def test_batch_db_read_through_delete_after_modify(base_db): base_db[b'modify-then-delete'] = b'original' batch_db = BatchDB(base_db, read_through_deletes=True) batch_db.set(b'modify-then-delete', b'new-val') assert batch_db[b'modify-then-delete'] == b'new-val' batch_db.delete(b'modify-then-delete') assert batch_db[b'modify-then-delete'] == b'original' batch_db.commit(apply_deletes=False) assert base_db[b'modify-then-delete'] == b'original'
def test_batch_db_read_through_delete(base_db): base_db[b'read-through-deleted'] = b'still-here' batch_db = BatchDB(base_db, read_through_deletes=True) batch_db.set(b'only-in-batch', b'will-disappear') batch_db.delete(b'read-through-deleted') batch_db.delete(b'only-in-batch') assert b'read-through-deleted' in batch_db assert batch_db[b'read-through-deleted'] == b'still-here' assert b'only-in-batch' not in batch_db with pytest.raises(KeyError): batch_db[b'only-in-batch'] batch_db.commit(apply_deletes=False) assert base_db[b'read-through-deleted'] == b'still-here' # deleted batch data should never get pushed to the underlying assert b'only-in-batch' not in base_db
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: -> hash-trie -> storage lookups / db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: There is an opportunity to do something similar for storage AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account = self._get_account(address, from_journal) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if slot_as_key in storage: encoded_value = storage[slot_as_key] return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) else: return 0 def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account = self._get_account(address) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if value: encoded_value = rlp.encode(value) storage[slot_as_key] = encoded_value else: del storage[slot_as_key] self._set_account(address, account.copy(storage_root=storage.root_hash)) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(storage_root=BLANK_ROOT_HASH)) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") try: return self._journaldb[self.get_code_hash(address)] except KeyError: return b"" def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") return self._journaltrie.get(address, b'') != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'') if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> Tuple[UUID, UUID]: return (self._journaldb.record(), self._journaltrie.record()) def discard(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.discard(db_changeset) self._journaltrie.discard(trie_changeset) self._account_cache.clear() def commit(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.commit(db_changeset) self._journaltrie.commit(trie_changeset) def make_state_root(self) -> Hash32: self.logger.debug2("Generating AccountDB trie") self._journaldb.persist() self._journaltrie.persist() return self.state_root def persist(self) -> None: self.make_state_root() self._batchtrie.commit(apply_deletes=False) self._batchdb.commit(apply_deletes=True) def _log_pending_accounts(self) -> None: accounts_displayed = set() # type: Set[bytes] queued_changes = self._journaltrie.journal.journal_data.items() # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078 for _, accounts in reversed(queued_changes): for address in accounts: if address in accounts_displayed: continue else: accounts_displayed.add(address) account = self._get_account(Address(address)) self.logger.debug2( "Account %s: balance %d, nonce %d, storage root %s, code hash %s", encode_hex(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), )
class AccountStorageDB(AccountStorageDatabaseAPI): logger = get_extended_debug_logger("eth.db.storage.AccountStorageDB") def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. _locked_changes is a batch database that includes only those values that are un-revertable in the EVM. Currently, that means changes that completed in a previous transaction. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._locked_changes = BatchDB(self._storage_cache) self._journal_storage = JournalDB(self._locked_changes) def get(self, slot: int, from_journal: bool=True) -> int: key = int_to_big_endian(slot) lookup_db = self._journal_storage if from_journal else self._locked_changes try: encoded_value = lookup_db[key] except MissingStorageTrieNode: raise except KeyError: return 0 if encoded_value == b'': return 0 else: return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) def set(self, slot: int, value: int) -> None: key = int_to_big_endian(slot) if value: self._journal_storage[key] = rlp.encode(value) else: del self._journal_storage[key] def delete(self) -> None: self.logger.debug2( "Deleting all storage in account 0x%s, hashed 0x%s", self._address.hex(), keccak(self._address).hex(), ) self._journal_storage.clear() self._storage_cache.reset_cache() def record(self, checkpoint: JournalDBCheckpoint) -> None: self._journal_storage.record(checkpoint) def discard(self, checkpoint: JournalDBCheckpoint) -> None: self.logger.debug2('discard checkpoint %r', checkpoint) if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.discard(checkpoint) else: # if the checkpoint comes before this account started tracking, # then simply reset to the beginning self._journal_storage.reset() self._storage_cache.reset_cache() def commit(self, checkpoint: JournalDBCheckpoint) -> None: if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.commit(checkpoint) else: # if the checkpoint comes before this account started tracking, # then flatten all changes, without persisting self._journal_storage.flatten() def lock_changes(self) -> None: self._journal_storage.persist() def make_storage_root(self) -> None: self.lock_changes() self._locked_changes.commit(apply_deletes=True) def _validate_flushed(self) -> None: """ Will raise an exception if there are some changes made since the last persist. """ journal_diff = self._journal_storage.diff() if len(journal_diff) > 0: raise ValidationError( f"StorageDB had a dirty journal when it needed to be clean: {journal_diff!r}" ) @property def has_changed_root(self) -> bool: return self._storage_lookup.has_changed_root def get_changed_root(self) -> Hash32: return self._storage_lookup.get_changed_root() def persist(self, db: DatabaseAPI) -> None: self._validate_flushed() if self._storage_lookup.has_changed_root: self._storage_lookup.commit_to(db)