Example #1
0
def test_ephemeral_db_basics():
    center_id = 0
    keys = tuple(b'key/%d' % idx for idx in range(5))
    key_a, key_b, key_c, key_d, key_e = tuple(
        sorted(
            keys,
            key=lambda key: compute_distance(center_id,
                                             content_key_to_node_id(key)),
        ))

    db = EphemeralDB(
        capacity=20,
        distance_fn=lambda key: compute_distance(center_id,
                                                 content_key_to_node_id(key)),
    )
    assert db.has_capacity
    # first key fills the whole database... and is immediately evicted.
    db.set(key_e, b'0' * 21)
    assert db.has_capacity
    assert not db.has(key_e)

    # now get the database at to capacity
    db.set(key_a, b'0' * 10)
    db.set(key_c, b'0' * 10)
    assert not db.has_capacity
    assert db.capacity == 0
    assert db.has(key_a)
    assert db.has(key_c)

    # reinsertion of either key should not change things
    db.set(key_a, b'0' * 10)
    db.set(key_c, b'0' * 10)
    assert not db.has_capacity
    assert db.capacity == 0
    assert db.has(key_a)
    assert db.has(key_c)

    # inserting a key that is further away will be immediately evicted (no change)
    db.set(key_d, b'0')
    assert not db.has(key_d)
    assert not db.has_capacity
    assert db.capacity == 0
    assert db.has(key_a)
    assert db.has(key_c)

    # inserting a closer key will evict the furthest key
    db.set(key_b, b'0')
    assert not db.has(key_c)  # should be evicted
    assert db.has(key_a)
    assert db.has(key_b)
Example #2
0
    def __init__(
        self,
        key: Key,
        neighbors_left: Sequence[Key] = None,
        neighbors_right: Sequence[Key] = None,
    ) -> None:
        self.key = key
        self.membership_vector = content_key_to_node_id(int_to_big_endian(key))

        if neighbors_left is None:
            neighbors_left = []
        else:
            neighbors_left = list(neighbors_left)

        if neighbors_right is None:
            neighbors_right = []
        else:
            neighbors_right = list(neighbors_right)

        if not all(neighbor_key < key for neighbor_key in neighbors_left):
            raise ValidationError(
                "Invalid left neighbors for key={hex(key)}: {neighbors_left}")
        if not all(neighbor_key > key for neighbor_key in neighbors_right):
            raise ValidationError(
                "Invalid right neighbors for key={hex(key)}: {neighbors_right}"
            )

        self.neighbors = (neighbors_left, neighbors_right)
Example #3
0
    async def _handle_locate_requests(self) -> None:
        def get_endpoint(node_id: NodeID) -> Endpoint:
            try:
                return self.endpoint_db.get_endpoint(node_id)
            except KeyError:
                if node_id == self.client.local_node_id:
                    return self.client.external_endpoint
                else:
                    raise

        async with self.client.message_dispatcher.subscribe(
                Locate) as subscription:
            while self.manager.is_running:
                request = await subscription.receive()
                payload = request.payload
                content_id = content_key_to_node_id(payload.key)
                # TODO: ping the node to ensure it is available (unless it is the sending node).
                # TODO: verify content is actually available
                # TODO: check distance of key and store conditionally
                location_ids = self.content_manager.get_index(content_id)

                locations = tuple(
                    Node(node_id, get_endpoint(node_id))
                    for node_id in location_ids)
                await self.client.send_locations(
                    request.node,
                    request_id=payload.request_id,
                    locations=locations,
                )
Example #4
0
    def __init__(
        self,
        center_id: NodeID,
        durable_db: DurableDatabaseAPI,
        config: StorageConfig = None,
    ) -> None:
        if config is None:
            config = StorageConfig()
        self.config = config
        self.center_id = center_id

        # A database not subject to storage limits that will not have data
        # discarded or added to it.
        self.durable_db = durable_db
        self.rebuild_durable_index()

        # A database that will be dynamically populated by data we learn about
        # over the network.  Total size is capped.  Eviction of keys is based
        # on the kademlia distance metric, preferring keys that are near our
        # center.
        self.ephemeral_db = EphemeralDB(
            capacity=self.config.ephemeral_storage_size,
            distance_fn=lambda key: compute_distance(
                center_id, content_key_to_node_id(key)),
        )
        self.ephemeral_index = EphemeralIndex(
            center_id=self.center_id,
            capacity=self.config.ephemeral_index_size,
        )

        # A database that holds recently seen content and evicts based on an
        # LRU policy.
        self.cache_db = CacheDB(capacity=self.config.cache_storage_size)
        self.cache_index = CacheIndex(capacity=self.config.cache_index_size)
Example #5
0
    def _check_interest_in_ephemeral_content(self, key: bytes) -> bool:
        if self.content_manager.durable_db.has(key):
            return False
        elif self.content_manager.ephemeral_db.has(key):
            return False
        elif self.content_manager.ephemeral_db.has_capacity:
            return True

        content_id = content_key_to_node_id(key)
        content_distance = compute_distance(content_id,
                                            self.client.local_node_id)
        furthest_key = sorted(
            self.content_manager.ephemeral_db.keys(),
            key=lambda key: compute_distance(self.client.local_node_id,
                                             content_key_to_node_id(key)
                                             ),  # noqa: E501
            reverse=True,
        )[0]
        furthest_content_id = content_key_to_node_id(furthest_key)
        furthest_distance = compute_distance(furthest_content_id,
                                             self.client.local_node_id)
        return content_distance < furthest_distance
Example #6
0
def test_ephemeral_db_fuzz(capacity):
    center_id = 0
    keys = tuple(b'key/%d' % idx for idx in range(50))
    values = tuple('0' * idx for idx in range(1, 51))
    items = list(zip(keys, values))
    lookup = dict(items)
    sorted_items = list(
        sorted(
            items,
            key=lambda item: compute_distance(center_id,
                                              content_key_to_node_id(item[0])),
        ))
    cumulative_sizes = tuple(
        accumulate(operator.add, (len(item[1]) for item in sorted_items)))
    cutoff_index = bisect.bisect_left(cumulative_sizes, capacity)
    remaining_capacity = capacity - cumulative_sizes[cutoff_index - 1]

    sorted_keys = tuple(key for key, value in sorted_items)
    expected_keys = sorted_keys[:cutoff_index]
    expected_evicted_keys = sorted_keys[cutoff_index:]

    random.shuffle(items)
    db = EphemeralDB(
        capacity=capacity,
        distance_fn=lambda key: compute_distance(center_id,
                                                 content_key_to_node_id(key)),
    )

    for key, value in items:
        db.set(key, value)

    for key in expected_keys:
        assert db.has(key)
    for key in expected_evicted_keys:
        # we can end up with extra keys if the database has extra capacity that
        # don't end up getting evicted.
        if len(lookup[key]) <= remaining_capacity:
            continue
        assert not db.has(key)
Example #7
0
    def has_key(self, key: bytes) -> bool:
        if self.durable_db.has(key):
            return True
        elif self.ephemeral_db.has(key):
            return True
        elif self.cache_db.has(key):
            return True

        content_id = content_key_to_node_id(key)

        if self.ephemeral_index.has(content_id):
            return True
        elif self.cache_index.has(content_id):
            return True
        else:
            return False
Example #8
0
    async def insert(self, key: Key) -> None:
        async def do_insert(location: Node) -> None:
            with trio.move_on_after(LINK_TIMEOUT):
                await self.client.graph_insert(location, key=key)

        content_key = graph_key_to_content_key(key)
        content_id = content_key_to_node_id(content_key)

        locations = await self.locations(content_key)
        nodes_near_content_id = await self.iterative_lookup(content_id)

        async with trio.open_nursery() as nursery:
            for location in locations:
                nursery.start_soon(do_insert, location)
            for node in nodes_near_content_id:
                nursery.start_soon(do_insert, node)
Example #9
0
    async def announce(self, key: bytes, who: Node) -> None:
        self.logger.debug("Starting announce for: %s", encode_hex(key))
        content_id = content_key_to_node_id(key)
        found_nodes = await self.iterative_lookup(content_id)

        async def do_advertise(node: Node) -> None:
            with trio.move_on_after(ADVERTISE_TIMEOUT):
                await self.client.advertise(node, key=key, who=who)

        for batch in partition_all(KADEMLIA_ANNOUNCE_CONCURRENCY, found_nodes):
            async with trio.open_nursery() as nursery:
                for node in batch:
                    nursery.start_soon(do_advertise, node)
        self.logger.debug(
            "Finished announce to %d peers for: %s",
            len(found_nodes),
            encode_hex(key),
        )
Example #10
0
    async def locations(self, key: bytes) -> Tuple[Node, ...]:
        content_id = content_key_to_node_id(key)

        send_channel, receive_channel = trio.open_memory_channel[Node](0)

        async def do_get_locations(
                node: Node, send_channel: trio.abc.SendChannel[Node]) -> None:
            async with send_channel:
                try:
                    with trio.fail_after(LOCATE_TIMEOUT):
                        locations = await self.locate(node, key=key)
                except trio.TooSlowError:
                    self.logger.debug(
                        "Timeout getting locations: node=%s  key=%r",
                        node,
                        key,
                    )
                else:
                    for location in locations:
                        if location.node_id == self.client.local_node_id:
                            continue
                        await send_channel.send(location)

        nodes_to_ask = await self.iterative_lookup(content_id)
        async with trio.open_nursery() as nursery:
            async with send_channel:
                for node in nodes_to_ask:
                    nursery.start_soon(do_get_locations, node,
                                       send_channel.clone())

            async with receive_channel:
                locations = tuple(
                    set([location async for location in receive_channel]))

        self.logger.debug("Found %d locations for %r:%d", len(locations), key,
                          content_id)
        return locations
Example #11
0
 def ingest_content(self, content: ContentBundle) -> None:
     if content.data is not None:
         self._ingest_content_data(content.key, content.data)
     content_id = content_key_to_node_id(content.key)
     self._ingest_content_index(content_id, content.node_id)
Example #12
0
 def rebuild_durable_index(self) -> None:
     self.durable_index = {
         content_key_to_node_id(key): (self.center_id, )
         for key in self.durable_db.keys()
     }