def __init__(self, db: AtomicDatabaseAPI, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = KeyAccessLoggerAtomicDB(db, log_missing_keys=False) self._batchdb = BatchDB(self._raw_store_db) self._batchtrie = BatchDB(self._raw_store_db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_logger = KeyAccessLoggerDB(self._trie, log_missing_keys=False) self._trie_cache = CacheDB(self._trie_logger) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {} self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root self._accessed_accounts: Set[Address] = set() self._accessed_bytecodes: Set[Address] = set() # Track whether an account or slot have been accessed during a given transaction: self._reset_access_counters()
def __init__(self, db: BaseDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: -> hash-trie -> storage lookups / db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: There is an opportunity to do something similar for storage AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048)
def test_journal_db_rejects_committing_root(): memory_db = MemoryDB({}) journal_db = JournalDB(memory_db) root = journal_db.journal.root_changeset_id with pytest.raises(ValidationError): journal_db.commit(root)
def __init__(self, db: BaseAtomicDB, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._journal_storage = JournalDB(self._storage_cache)
def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. _locked_changes is a batch database that includes only those values that are un-revertable in the EVM. Currently, that means changes that completed in a previous transaction. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._locked_changes = JournalDB(self._storage_cache) self._journal_storage = JournalDB(self._locked_changes) self._accessed_slots: Set[int] = set() # Track how many times we have cleared the storage. This is journaled # in lockstep with other storage changes. That way, we can detect if a revert # causes use to revert past the previous storage deletion. The clear count is used # as an index to find the base trie from before the revert. self._clear_count = JournalDB( MemoryDB({CLEAR_COUNT_KEY_NAME: to_bytes(0)}))
def test_journal_db_diff_respects_clear(): memory_db = MemoryDB({}) journal_db = JournalDB(memory_db) journal_db[b'first'] = b'val' journal_db.clear() pending = journal_db.diff().pending_items() assert len(pending) == 0
def db(request): base_db = MemoryDB() if request.param is JournalDB: return JournalDB(base_db) elif request.param is BatchDB: return BatchDB(base_db) elif request.param is MemoryDB: return base_db else: raise Exception("Invariant")
def test_journal_persist_set_KeyError_leaves_changeset_in_place(): memory_db = MemoryDBSetRaisesKeyError() journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(KeyError): journal_db.persist() diff = journal_db.diff() assert diff.pending_items() == ((b'failing-to-set-key', b'val'), )
def test_journal_persist_set_KeyError(): memory_db = MemoryDBSetRaisesKeyError() # make sure test is set up correctly with pytest.raises(KeyError): memory_db[b'failing-to-set-key'] = b'val' journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(KeyError): journal_db.persist()
def test_journal_persist_set_fail_leaves_checkpoint_in_place( db_class, expected_exception): memory_db = db_class() journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(expected_exception): journal_db.persist() diff = journal_db.diff() assert diff.pending_items() == ((b'failing-to-set-key', b'val'), )
def test_journal_persist_set_fail(db_class, expected_exception): memory_db = db_class() # make sure test is set up correctly with pytest.raises(expected_exception): memory_db[b'failing-to-set-key'] = b'val' journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(expected_exception): journal_db.persist()
def db(request): base_db = MemoryDB() if request.param is JournalDB: yield JournalDB(base_db) elif request.param is BatchDB: yield BatchDB(base_db) elif request.param is MemoryDB: yield base_db elif request.param is AtomicDB: atomic_db = AtomicDB(base_db) with atomic_db.atomic_batch() as batch: yield batch elif request.param is CacheDB: yield CacheDB(base_db) else: raise Exception("Invariant")
def test_journal_persist_set_KeyError_then_persist(): original_data = {b'data-to-delete': b'val'} memory_db = MemoryDBSetRaisesKeyError(original_data) journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(KeyError): journal_db.persist() assert b'failing-to-set-key' not in memory_db # A persist that fails reinstates all the pending changes as a single changeset # Let's switch to a Memory DB that doesn't fail on delete and try again: journal_db.wrapped_db = original_data # smoke test that persist works after an exception del journal_db[b'data-to-delete'] journal_db.persist() assert b'data-to-delete' not in memory_db # This key is set on the second attempt assert b'failing-to-set-key' in memory_db
def test_journal_persist_set_fail_then_persist(db_class, expected_exception): original_data = {b'data-to-delete': b'val'} memory_db = db_class(original_data) journal_db = JournalDB(memory_db) journal_db[b'failing-to-set-key'] = b'val' with pytest.raises(expected_exception): journal_db.persist() assert b'failing-to-set-key' not in memory_db # A persist that fails reinstates all the pending changes, but without any checkpoints. # Let's switch to a Memory DB that doesn't fail on delete and try again: journal_db._wrapped_db = original_data # smoke test that persist works after an exception del journal_db[b'data-to-delete'] journal_db.persist() assert b'data-to-delete' not in memory_db # This key is set on the second attempt assert b'failing-to-set-key' in memory_db
def test_journal_persist_delete_fail_then_persist(): db = {b'delete-me': b'val'} journal_db = JournalDB(db) del journal_db[b'delete-me'] # Let's artificially remove the key so it fails on delete # (this might happen if the wrapped db is a trie) db.clear() with pytest.raises(KeyError): journal_db.persist() # A persist that fails reinstates all the pending changes, but without any checkpoints. # Let's add the value to the Memory DB so doesn't fail on delete and try again: db[b'delete-me'] = b'val' # smoke test that persist works after an exception journal_db[b'new-key'] = b'new-val' journal_db.persist() assert db[b'new-key'] == b'new-val' assert b'delete-me' not in db
def journal_db(memory_db): return JournalDB(memory_db)
def _reset_access_counters(self) -> None: # Account accesses and storage accesses recorded in the same journal # Accounts just use the address as the key (and an empty value as a flag) # Storage use a concatenation of address and slot converted to bytes (and empty value) self._journal_accessed_state = JournalDB(MemoryDB())
def __init__(self): super().__init__() self.slow_wrapped = {} self.slow_journal = SlowJournalDB(self.slow_wrapped) self.fast_wrapped = {} self.fast_journal = JournalDB(self.fast_wrapped)