Example #1
0
def test_batch_db_read_through_should_fail_to_commit_deletes(base_db):
    batch_db = BatchDB(base_db, read_through_deletes=True)

    # When a batch_db is reading through it's deletes, those deletes
    # should never be applied. It's nonsense
    with pytest.raises(ValidationError):
        batch_db.commit(apply_deletes=True)
Example #2
0
def test_batch_db_read_through_delete_after_modify(base_db):
    base_db[b'modify-then-delete'] = b'original'

    batch_db = BatchDB(base_db, read_through_deletes=True)

    batch_db.set(b'modify-then-delete', b'new-val')

    assert batch_db[b'modify-then-delete'] == b'new-val'

    batch_db.delete(b'modify-then-delete')

    assert batch_db[b'modify-then-delete'] == b'original'

    batch_db.commit(apply_deletes=False)

    assert base_db[b'modify-then-delete'] == b'original'
Example #3
0
def test_batch_db_read_through_delete(base_db):
    base_db[b'read-through-deleted'] = b'still-here'

    batch_db = BatchDB(base_db, read_through_deletes=True)

    batch_db.set(b'only-in-batch', b'will-disappear')

    batch_db.delete(b'read-through-deleted')
    batch_db.delete(b'only-in-batch')

    assert b'read-through-deleted' in batch_db
    assert batch_db[b'read-through-deleted'] == b'still-here'

    assert b'only-in-batch' not in batch_db
    with pytest.raises(KeyError):
        batch_db[b'only-in-batch']

    batch_db.commit(apply_deletes=False)

    assert base_db[b'read-through-deleted'] == b'still-here'

    # deleted batch data should never get pushed to the underlying
    assert b'only-in-batch' not in base_db
Example #4
0
class AccountDB(BaseAccountDB):

    logger = cast(ExtendedDebugLogger,
                  logging.getLogger('eth.db.account.AccountDB'))

    def __init__(self,
                 db: BaseDB,
                 state_root: Hash32 = BLANK_ROOT_HASH) -> None:
        r"""
        Internal implementation details (subject to rapid change):
        Database entries go through several pipes, like so...

        .. code::

                                                                    -> hash-trie -> storage lookups
                                                                  /
            db > _batchdb ---------------------------> _journaldb ----------------> code lookups
             \
              -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups

        Journaling sequesters writes at the _journal* attrs ^, until persist is called.

        _batchtrie enables us to prune all trie changes while building
        state,  without deleting old trie roots.

        _batchdb and _batchtrie together enable us to make the state root,
        without saving everything to the database.

        _journaldb is a journaling of the keys and values used to store
        code and account storage.

        _trie is a hash-trie, used to generate the state root

        _trie_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journaltrie, because the cache is only invalidated
        after a state root change.

        _journaltrie is a journaling of the accounts (an address->rlp mapping,
        rather than the nodes stored by the trie). This enables
        a squashing of all account changes before pushing them into the trie.

        .. NOTE:: There is an opportunity to do something similar for storage

        AccountDB synchronizes the snapshot/revert/persist of both of the
        journals.
        """
        self._batchdb = BatchDB(db)
        self._batchtrie = BatchDB(db)
        self._journaldb = JournalDB(self._batchdb)
        self._trie = HashTrie(
            HexaryTrie(self._batchtrie, state_root, prune=True))
        self._trie_cache = CacheDB(self._trie)
        self._journaltrie = JournalDB(self._trie_cache)
        self._account_cache = LRU(2048)

    @property
    def state_root(self) -> Hash32:
        return self._trie.root_hash

    @state_root.setter
    def state_root(self, value: Hash32) -> None:
        self._trie_cache.reset_cache()
        self._trie.root_hash = value

    def has_root(self, state_root: bytes) -> bool:
        return state_root in self._batchtrie

    #
    # Storage
    #
    def get_storage(self,
                    address: Address,
                    slot: int,
                    from_journal: bool = True) -> int:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(slot, title="Storage Slot")

        account = self._get_account(address, from_journal)
        storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root))

        slot_as_key = pad32(int_to_big_endian(slot))

        if slot_as_key in storage:
            encoded_value = storage[slot_as_key]
            return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int)
        else:
            return 0

    def set_storage(self, address: Address, slot: int, value: int) -> None:
        validate_uint256(value, title="Storage Value")
        validate_uint256(slot, title="Storage Slot")
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root))

        slot_as_key = pad32(int_to_big_endian(slot))

        if value:
            encoded_value = rlp.encode(value)
            storage[slot_as_key] = encoded_value
        else:
            del storage[slot_as_key]

        self._set_account(address,
                          account.copy(storage_root=storage.root_hash))

    def delete_storage(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(storage_root=BLANK_ROOT_HASH))

    #
    # Balance
    #
    def get_balance(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.balance

    def set_balance(self, address: Address, balance: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(balance, title="Account Balance")

        account = self._get_account(address)
        self._set_account(address, account.copy(balance=balance))

    #
    # Nonce
    #
    def get_nonce(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.nonce

    def set_nonce(self, address: Address, nonce: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(nonce, title="Nonce")

        account = self._get_account(address)
        self._set_account(address, account.copy(nonce=nonce))

    def increment_nonce(self, address: Address) -> None:
        current_nonce = self.get_nonce(address)
        self.set_nonce(address, current_nonce + 1)

    #
    # Code
    #
    def get_code(self, address: Address) -> bytes:
        validate_canonical_address(address, title="Storage Address")

        try:
            return self._journaldb[self.get_code_hash(address)]
        except KeyError:
            return b""

    def set_code(self, address: Address, code: bytes) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_is_bytes(code, title="Code")

        account = self._get_account(address)

        code_hash = keccak(code)
        self._journaldb[code_hash] = code
        self._set_account(address, account.copy(code_hash=code_hash))

    def get_code_hash(self, address: Address) -> Hash32:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.code_hash

    def delete_code(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(code_hash=EMPTY_SHA3))

    #
    # Account Methods
    #
    def account_has_code_or_nonce(self, address: Address) -> bool:
        return self.get_nonce(address) != 0 or self.get_code_hash(
            address) != EMPTY_SHA3

    def delete_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")
        if address in self._account_cache:
            del self._account_cache[address]
        del self._journaltrie[address]

    def account_exists(self, address: Address) -> bool:
        validate_canonical_address(address, title="Storage Address")
        return self._journaltrie.get(address, b'') != b''

    def touch_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account)

    def account_is_empty(self, address: Address) -> bool:
        return not self.account_has_code_or_nonce(
            address) and self.get_balance(address) == 0

    #
    # Internal
    #
    def _get_account(self,
                     address: Address,
                     from_journal: bool = True) -> Account:
        if from_journal and address in self._account_cache:
            return self._account_cache[address]
        rlp_account = (self._journaltrie if from_journal else
                       self._trie_cache).get(address, b'')
        if rlp_account:
            account = rlp.decode(rlp_account, sedes=Account)
        else:
            account = Account()
        if from_journal:
            self._account_cache[address] = account
        return account

    def _set_account(self, address: Address, account: Account) -> None:
        self._account_cache[address] = account
        rlp_account = rlp.encode(account, sedes=Account)
        self._journaltrie[address] = rlp_account

    #
    # Record and discard API
    #
    def record(self) -> Tuple[UUID, UUID]:
        return (self._journaldb.record(), self._journaltrie.record())

    def discard(self, changeset: Tuple[UUID, UUID]) -> None:
        db_changeset, trie_changeset = changeset
        self._journaldb.discard(db_changeset)
        self._journaltrie.discard(trie_changeset)
        self._account_cache.clear()

    def commit(self, changeset: Tuple[UUID, UUID]) -> None:
        db_changeset, trie_changeset = changeset
        self._journaldb.commit(db_changeset)
        self._journaltrie.commit(trie_changeset)

    def make_state_root(self) -> Hash32:
        self.logger.debug2("Generating AccountDB trie")
        self._journaldb.persist()
        self._journaltrie.persist()
        return self.state_root

    def persist(self) -> None:
        self.make_state_root()
        self._batchtrie.commit(apply_deletes=False)
        self._batchdb.commit(apply_deletes=True)

    def _log_pending_accounts(self) -> None:
        accounts_displayed = set()  # type: Set[bytes]
        queued_changes = self._journaltrie.journal.journal_data.items()
        # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078
        for _, accounts in reversed(queued_changes):
            for address in accounts:
                if address in accounts_displayed:
                    continue
                else:
                    accounts_displayed.add(address)
                    account = self._get_account(Address(address))
                    self.logger.debug2(
                        "Account %s: balance %d, nonce %d, storage root %s, code hash %s",
                        encode_hex(address),
                        account.balance,
                        account.nonce,
                        encode_hex(account.storage_root),
                        encode_hex(account.code_hash),
                    )
Example #5
0
class AccountStorageDB(AccountStorageDatabaseAPI):
    logger = get_extended_debug_logger("eth.db.storage.AccountStorageDB")

    def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None:
        """
        Database entries go through several pipes, like so...

        .. code::

            db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage

        db is the raw database, we can assume it hits disk when written to.
        Keys are stored as node hashes and rlp-encoded node values.

        _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie),
        writes to storage lookup *are* immeditaely applied to a trie, generating
        the appropriate trie nodes and and root hash (via the HexaryTrie). The
        writes are *not* persisted to db, until _storage_lookup is explicitly instructed to,
        via :meth:`StorageLookup.commit_to`

        _storage_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journal_storage, because the cache is only invalidated
        after a state root change. Otherwise, you will see data since the last
        storage root was calculated.

        _locked_changes is a batch database that includes only those values that are
        un-revertable in the EVM. Currently, that means changes that completed in a
        previous transaction.

        Journaling batches writes at the _journal_storage layer, until persist is called.
        It manages all the checkpointing and rollbacks that happen during EVM execution.

        In both _storage_cache and _journal_storage, Keys are set/retrieved as the
        big_endian encoding of the slot integer, and the rlp-encoded value.
        """
        self._address = address
        self._storage_lookup = StorageLookup(db, storage_root, address)
        self._storage_cache = CacheDB(self._storage_lookup)
        self._locked_changes = BatchDB(self._storage_cache)
        self._journal_storage = JournalDB(self._locked_changes)

    def get(self, slot: int, from_journal: bool=True) -> int:
        key = int_to_big_endian(slot)
        lookup_db = self._journal_storage if from_journal else self._locked_changes
        try:
            encoded_value = lookup_db[key]
        except MissingStorageTrieNode:
            raise
        except KeyError:
            return 0

        if encoded_value == b'':
            return 0
        else:
            return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int)

    def set(self, slot: int, value: int) -> None:
        key = int_to_big_endian(slot)
        if value:
            self._journal_storage[key] = rlp.encode(value)
        else:
            del self._journal_storage[key]

    def delete(self) -> None:
        self.logger.debug2(
            "Deleting all storage in account 0x%s, hashed 0x%s",
            self._address.hex(),
            keccak(self._address).hex(),
        )
        self._journal_storage.clear()
        self._storage_cache.reset_cache()

    def record(self, checkpoint: JournalDBCheckpoint) -> None:
        self._journal_storage.record(checkpoint)

    def discard(self, checkpoint: JournalDBCheckpoint) -> None:
        self.logger.debug2('discard checkpoint %r', checkpoint)
        if self._journal_storage.has_checkpoint(checkpoint):
            self._journal_storage.discard(checkpoint)
        else:
            # if the checkpoint comes before this account started tracking,
            #    then simply reset to the beginning
            self._journal_storage.reset()
        self._storage_cache.reset_cache()

    def commit(self, checkpoint: JournalDBCheckpoint) -> None:
        if self._journal_storage.has_checkpoint(checkpoint):
            self._journal_storage.commit(checkpoint)
        else:
            # if the checkpoint comes before this account started tracking,
            #    then flatten all changes, without persisting
            self._journal_storage.flatten()

    def lock_changes(self) -> None:
        self._journal_storage.persist()

    def make_storage_root(self) -> None:
        self.lock_changes()
        self._locked_changes.commit(apply_deletes=True)

    def _validate_flushed(self) -> None:
        """
        Will raise an exception if there are some changes made since the last persist.
        """
        journal_diff = self._journal_storage.diff()
        if len(journal_diff) > 0:
            raise ValidationError(
                f"StorageDB had a dirty journal when it needed to be clean: {journal_diff!r}"
            )

    @property
    def has_changed_root(self) -> bool:
        return self._storage_lookup.has_changed_root

    def get_changed_root(self) -> Hash32:
        return self._storage_lookup.get_changed_root()

    def persist(self, db: DatabaseAPI) -> None:
        self._validate_flushed()
        if self._storage_lookup.has_changed_root:
            self._storage_lookup.commit_to(db)