def encode_leaf_node(value): """ Serializes a leaf node """ validate_is_bytes(value) if value is None or value == b'': raise ValidationError("Value of leaf node can not be empty") return LEAF_TYPE_PREFIX + value
def encode_kv_node(keypath, child_node_hash): """ Serializes a key/value node """ if keypath is None or keypath == b'': raise ValidationError("Key path can not be empty") validate_is_bytes(keypath) validate_is_bytes(child_node_hash) validate_length(child_node_hash, 32) return KV_TYPE_PREFIX + encode_from_bin_keypath(keypath) + child_node_hash
def _prune_on_success(self): if self.is_pruning: if self._pending_prune_keys is None: self._pending_prune_keys = defaultdict(int) else: raise ValidationError( "Cannot set/delete simultaneously, run them in serial") try: yield if self.is_pruning: self._complete_pruning() finally: # Reset for next set/delete self._pending_prune_keys = None
def _get(self, root_hash, trie_key): node, remaining_key = self._traverse(root_hash, trie_key) node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return BLANK_NODE elif node_type == NODE_TYPE_LEAF: if remaining_key == extract_key(node): return node[1] else: # Any remaining key that isn't an exact match for the leaf node must # be pointing to a value that doesn't exist. return BLANK_NODE elif node_type == NODE_TYPE_EXTENSION: if len(remaining_key) > 0: # Any remaining key should have traversed down into the extension's child. # (or returned a blank node if the key didn't match the extension) raise ValidationError( "Traverse should never return an extension node with remaining key, " f"but returned node {node!r} with remaining key {remaining_key}." ) else: return BLANK_NODE elif node_type == NODE_TYPE_BRANCH: if len(remaining_key) > 0: # Any remaining key should have traversed down into the branch's child, even # if the branch had an empty child, which would then return a BLANK_NODE. raise ValidationError( "Traverse should never return a non-empty branch node with remaining key, " f"but returned node {node!r} with remaining key {remaining_key}." ) else: return node[-1] else: raise Exception("Invariant: This shouldn't ever happen")
def _prune_key(self, key): new_count = self.ref_count[key] - 1 if new_count <= 0: # Ref count doesn't track keys that are already in the starting database, # so ref count can go negative. Then, detect if key is in underlying: # - If so, delete it and set the refcount down to 0 # - If not, raise an exception about trying to prune a node that doesn't exist try: del self.db[key] except KeyError as exc: raise ValidationError( "Tried to prune key %r that doesn't exist" % key) from exc else: new_count = 0 self.ref_count[key] = new_count
def update(self, key: bytes, value: bytes, node_updates: Sequence[Hash32]): """ Merge an update for another key with the one we are tracking internally. :param key: keypath of the update we are processing :param value: value of the update we are processing :param node_updates: sequence of sibling nodes (in root->leaf order) must be at least as large as the first diverging key in the keypath """ validate_is_bytes(key) validate_length(key, self._key_size) # Path diff is the logical XOR of the updated key and this account path_diff = (to_int(self.key) ^ to_int(key)) # Same key (diff of 0), update the tracked value if path_diff == 0: self._value = value # No need to update branch else: # Find the first mismatched bit between keypaths. This is # where the branch point occurs, and we should update the # sibling node in the source branch at the branch point. # NOTE: Keys are in MSB->LSB (root->leaf) order. # Node lists are in root->leaf order. # Be sure to convert between them effectively. for bit in reversed(range(self._branch_size)): if path_diff & (1 << bit) > 0: branch_point = (self._branch_size - 1) - bit break # NOTE: node_updates only has to be as long as necessary # to obtain the update. This allows an optimization # of pruning updates to the maximum possible depth # that would be required to update, which may be # significantly smaller than the tree depth. if len(node_updates) <= branch_point: raise ValidationError("Updated node list is not deep enough") # Update sibling node in the branch where our key differs from the update self._branch[branch_point] = node_updates[branch_point]
def validate_is_node(node): if node == BLANK_NODE: return elif len(node) == 2: key, value = node validate_is_bytes(key) if isinstance(value, list): validate_is_node(value) else: validate_is_bytes(value) elif len(node) == 17: validate_is_bytes(node[16]) for sub_node in node[:16]: if sub_node == BLANK_NODE: continue elif isinstance(sub_node, list): validate_is_node(sub_node) else: validate_is_bytes(sub_node) validate_length(sub_node, 32) else: raise ValidationError("Invalid Node: {0}".format(node))
def __init__(self, key_size: int = 32, default: bytes = BLANK_NODE): """ Maintain a a binary trie with a particular depth (defined by key size) All values are stored at that depth, and the tree has a default value that it is reset to when a key is cleared. If this default is anything other than a blank node, then all keys "exist" in the database, which mimics the behavior of Ethereum on-chain datastores. :param key_size: The size (in # of bytes) of the key. All keys must be this size. Note that the size should be between 1 and 32 bytes. For performance, it is not advisible to have a key larger than 32 bytes (and you should optimize to much less than that) but if the data structure you seek to use as a key is larger, the suggestion would be to hash that structure in a serialized format to obtain the key, or add a unique identifier to the structure. :param default: The default value used for the database. Initializes the root. """ # Ensure we can support the given depth if not 1 <= key_size <= 32: raise ValidationError( "Keysize must be number of bytes in range [1, 32]") self._key_size = key_size # key's size (# of bytes) self.depth = key_size * 8 # depth is number of bits in the key self._default = default # Initialize an empty tree with one branch self.db = {} node = self._default # Default leaf node for _ in range(self.depth): node_hash = keccak(node) self.db[node_hash] = node node = node_hash + node_hash # Finally, write the root hash self.root_hash = keccak(node) self.db[self.root_hash] = node
def _complete_pruning(self): for key, number_prunes in self._pending_prune_keys.items(): new_count = self._ref_count[key] - number_prunes if new_count <= 0: # Ref count doesn't track keys that are already in the starting database, # so ref count can go negative. Then, detect if key is in underlying: # - If so, delete it and set the refcount down to 0 # - If not, raise an exception about trying to prune a node that doesn't exist try: del self.db[key] except KeyError as exc: raise ValidationError( "Tried to prune key %r that doesn't exist" % key) from exc else: new_count = 0 if new_count == 0: # This is an optimization, to reduce the size of the _ref_count dict del self._ref_count[key] else: self._ref_count[key] = new_count
def at_root(self, at_root_hash): if self.is_pruning: raise ValidationError("Cannot use trie snapshot while pruning") snapshot = type(self)(self.db, at_root_hash, prune=False) yield snapshot
def validate_is_bytes(value): if not isinstance(value, bytes): raise ValidationError("Value is not of type `bytes`: got '{0}'".format( type(value)))
def validate_is_bin_node(node): if node == BLANK_HASH or node[0] in BINARY_TRIE_NODE_TYPES: return else: raise ValidationError("Invalid Node: {0}".format(node))
def validate_length(value, length): if len(value) != length: raise ValidationError("Value is of length {0}. Must be {1}".format( len(value), length))