def test_ephemeral_db_basics(): center_id = 0 keys = tuple(b'key/%d' % idx for idx in range(5)) key_a, key_b, key_c, key_d, key_e = tuple( sorted( keys, key=lambda key: compute_distance(center_id, content_key_to_node_id(key)), )) db = EphemeralDB( capacity=20, distance_fn=lambda key: compute_distance(center_id, content_key_to_node_id(key)), ) assert db.has_capacity # first key fills the whole database... and is immediately evicted. db.set(key_e, b'0' * 21) assert db.has_capacity assert not db.has(key_e) # now get the database at to capacity db.set(key_a, b'0' * 10) db.set(key_c, b'0' * 10) assert not db.has_capacity assert db.capacity == 0 assert db.has(key_a) assert db.has(key_c) # reinsertion of either key should not change things db.set(key_a, b'0' * 10) db.set(key_c, b'0' * 10) assert not db.has_capacity assert db.capacity == 0 assert db.has(key_a) assert db.has(key_c) # inserting a key that is further away will be immediately evicted (no change) db.set(key_d, b'0') assert not db.has(key_d) assert not db.has_capacity assert db.capacity == 0 assert db.has(key_a) assert db.has(key_c) # inserting a closer key will evict the furthest key db.set(key_b, b'0') assert not db.has(key_c) # should be evicted assert db.has(key_a) assert db.has(key_b)
def __init__( self, key: Key, neighbors_left: Sequence[Key] = None, neighbors_right: Sequence[Key] = None, ) -> None: self.key = key self.membership_vector = content_key_to_node_id(int_to_big_endian(key)) if neighbors_left is None: neighbors_left = [] else: neighbors_left = list(neighbors_left) if neighbors_right is None: neighbors_right = [] else: neighbors_right = list(neighbors_right) if not all(neighbor_key < key for neighbor_key in neighbors_left): raise ValidationError( "Invalid left neighbors for key={hex(key)}: {neighbors_left}") if not all(neighbor_key > key for neighbor_key in neighbors_right): raise ValidationError( "Invalid right neighbors for key={hex(key)}: {neighbors_right}" ) self.neighbors = (neighbors_left, neighbors_right)
async def _handle_locate_requests(self) -> None: def get_endpoint(node_id: NodeID) -> Endpoint: try: return self.endpoint_db.get_endpoint(node_id) except KeyError: if node_id == self.client.local_node_id: return self.client.external_endpoint else: raise async with self.client.message_dispatcher.subscribe( Locate) as subscription: while self.manager.is_running: request = await subscription.receive() payload = request.payload content_id = content_key_to_node_id(payload.key) # TODO: ping the node to ensure it is available (unless it is the sending node). # TODO: verify content is actually available # TODO: check distance of key and store conditionally location_ids = self.content_manager.get_index(content_id) locations = tuple( Node(node_id, get_endpoint(node_id)) for node_id in location_ids) await self.client.send_locations( request.node, request_id=payload.request_id, locations=locations, )
def __init__( self, center_id: NodeID, durable_db: DurableDatabaseAPI, config: StorageConfig = None, ) -> None: if config is None: config = StorageConfig() self.config = config self.center_id = center_id # A database not subject to storage limits that will not have data # discarded or added to it. self.durable_db = durable_db self.rebuild_durable_index() # A database that will be dynamically populated by data we learn about # over the network. Total size is capped. Eviction of keys is based # on the kademlia distance metric, preferring keys that are near our # center. self.ephemeral_db = EphemeralDB( capacity=self.config.ephemeral_storage_size, distance_fn=lambda key: compute_distance( center_id, content_key_to_node_id(key)), ) self.ephemeral_index = EphemeralIndex( center_id=self.center_id, capacity=self.config.ephemeral_index_size, ) # A database that holds recently seen content and evicts based on an # LRU policy. self.cache_db = CacheDB(capacity=self.config.cache_storage_size) self.cache_index = CacheIndex(capacity=self.config.cache_index_size)
def _check_interest_in_ephemeral_content(self, key: bytes) -> bool: if self.content_manager.durable_db.has(key): return False elif self.content_manager.ephemeral_db.has(key): return False elif self.content_manager.ephemeral_db.has_capacity: return True content_id = content_key_to_node_id(key) content_distance = compute_distance(content_id, self.client.local_node_id) furthest_key = sorted( self.content_manager.ephemeral_db.keys(), key=lambda key: compute_distance(self.client.local_node_id, content_key_to_node_id(key) ), # noqa: E501 reverse=True, )[0] furthest_content_id = content_key_to_node_id(furthest_key) furthest_distance = compute_distance(furthest_content_id, self.client.local_node_id) return content_distance < furthest_distance
def test_ephemeral_db_fuzz(capacity): center_id = 0 keys = tuple(b'key/%d' % idx for idx in range(50)) values = tuple('0' * idx for idx in range(1, 51)) items = list(zip(keys, values)) lookup = dict(items) sorted_items = list( sorted( items, key=lambda item: compute_distance(center_id, content_key_to_node_id(item[0])), )) cumulative_sizes = tuple( accumulate(operator.add, (len(item[1]) for item in sorted_items))) cutoff_index = bisect.bisect_left(cumulative_sizes, capacity) remaining_capacity = capacity - cumulative_sizes[cutoff_index - 1] sorted_keys = tuple(key for key, value in sorted_items) expected_keys = sorted_keys[:cutoff_index] expected_evicted_keys = sorted_keys[cutoff_index:] random.shuffle(items) db = EphemeralDB( capacity=capacity, distance_fn=lambda key: compute_distance(center_id, content_key_to_node_id(key)), ) for key, value in items: db.set(key, value) for key in expected_keys: assert db.has(key) for key in expected_evicted_keys: # we can end up with extra keys if the database has extra capacity that # don't end up getting evicted. if len(lookup[key]) <= remaining_capacity: continue assert not db.has(key)
def has_key(self, key: bytes) -> bool: if self.durable_db.has(key): return True elif self.ephemeral_db.has(key): return True elif self.cache_db.has(key): return True content_id = content_key_to_node_id(key) if self.ephemeral_index.has(content_id): return True elif self.cache_index.has(content_id): return True else: return False
async def insert(self, key: Key) -> None: async def do_insert(location: Node) -> None: with trio.move_on_after(LINK_TIMEOUT): await self.client.graph_insert(location, key=key) content_key = graph_key_to_content_key(key) content_id = content_key_to_node_id(content_key) locations = await self.locations(content_key) nodes_near_content_id = await self.iterative_lookup(content_id) async with trio.open_nursery() as nursery: for location in locations: nursery.start_soon(do_insert, location) for node in nodes_near_content_id: nursery.start_soon(do_insert, node)
async def announce(self, key: bytes, who: Node) -> None: self.logger.debug("Starting announce for: %s", encode_hex(key)) content_id = content_key_to_node_id(key) found_nodes = await self.iterative_lookup(content_id) async def do_advertise(node: Node) -> None: with trio.move_on_after(ADVERTISE_TIMEOUT): await self.client.advertise(node, key=key, who=who) for batch in partition_all(KADEMLIA_ANNOUNCE_CONCURRENCY, found_nodes): async with trio.open_nursery() as nursery: for node in batch: nursery.start_soon(do_advertise, node) self.logger.debug( "Finished announce to %d peers for: %s", len(found_nodes), encode_hex(key), )
async def locations(self, key: bytes) -> Tuple[Node, ...]: content_id = content_key_to_node_id(key) send_channel, receive_channel = trio.open_memory_channel[Node](0) async def do_get_locations( node: Node, send_channel: trio.abc.SendChannel[Node]) -> None: async with send_channel: try: with trio.fail_after(LOCATE_TIMEOUT): locations = await self.locate(node, key=key) except trio.TooSlowError: self.logger.debug( "Timeout getting locations: node=%s key=%r", node, key, ) else: for location in locations: if location.node_id == self.client.local_node_id: continue await send_channel.send(location) nodes_to_ask = await self.iterative_lookup(content_id) async with trio.open_nursery() as nursery: async with send_channel: for node in nodes_to_ask: nursery.start_soon(do_get_locations, node, send_channel.clone()) async with receive_channel: locations = tuple( set([location async for location in receive_channel])) self.logger.debug("Found %d locations for %r:%d", len(locations), key, content_id) return locations
def ingest_content(self, content: ContentBundle) -> None: if content.data is not None: self._ingest_content_data(content.key, content.data) content_id = content_key_to_node_id(content.key) self._ingest_content_index(content_id, content.node_id)
def rebuild_durable_index(self) -> None: self.durable_index = { content_key_to_node_id(key): (self.center_id, ) for key in self.durable_db.keys() }