def test_ids_basic(): # basic functionality tests for i in range(100): id1, id2 = DHTID.generate(), DHTID.generate() assert DHTID.MIN <= id1 < DHTID.MAX and DHTID.MIN <= id2 <= DHTID.MAX assert DHTID.xor_distance(id1, id1) == DHTID.xor_distance(id2, id2) == 0 assert DHTID.xor_distance(id1, id2) > 0 or (id1 == id2) assert DHTID.from_bytes(bytes(id1)) == id1 and DHTID.from_bytes( id2.to_bytes()) == id2
async def rpc_ping(self, request: dht_pb2.PingRequest, context: grpc.ServicerContext): """ Some node wants us to add it to our routing table. """ response = dht_pb2.PingResponse(peer=self.node_info, sender_endpoint=context.peer(), dht_time=get_dht_time(), available=False) if request.peer and request.peer.node_id and request.peer.rpc_port: sender_id = DHTID.from_bytes(request.peer.node_id) if request.peer.endpoint != dht_pb2.NodeInfo.endpoint.DESCRIPTOR.default_value: sender_endpoint = request.peer.endpoint # if peer has preferred endpoint, use it else: sender_endpoint = replace_port(context.peer(), new_port=request.peer.rpc_port) response.sender_endpoint = sender_endpoint if request.validate: response.available = await self.call_ping( response.sender_endpoint, validate=False) == sender_id asyncio.create_task( self.update_routing_table(sender_id, sender_endpoint, responded=response.available or not request.validate)) return response
async def rpc_find(self, request: dht_pb2.FindRequest, context: grpc.ServicerContext) -> dht_pb2.FindResponse: """ Someone wants to find keys in the DHT. For all keys that we have locally, return value and expiration Also return :bucket_size: nearest neighbors from our routing table for each key (whether or not we found value) """ if request.peer: # if requested, add peer to the routing table asyncio.create_task(self.rpc_ping(dht_pb2.PingRequest(peer=request.peer), context)) response = dht_pb2.FindResponse(results=[], peer=self.node_info) for i, key_id in enumerate(map(DHTID.from_bytes, request.keys)): maybe_item = self.storage.get(key_id) cached_item = self.cache.get(key_id) if cached_item is not None and (maybe_item is None or cached_item.expiration_time > maybe_item.expiration_time): maybe_item = cached_item if maybe_item is None: # value not found item = dht_pb2.FindResult(type=dht_pb2.NOT_FOUND) elif isinstance(maybe_item.value, DictionaryDHTValue): item = dht_pb2.FindResult(type=dht_pb2.FOUND_DICTIONARY, value=self.serializer.dumps(maybe_item.value), expiration_time=maybe_item.expiration_time) else: # found regular value item = dht_pb2.FindResult(type=dht_pb2.FOUND_REGULAR, value=maybe_item.value, expiration_time=maybe_item.expiration_time) for node_id, endpoint in self.routing_table.get_nearest_neighbors( key_id, k=self.bucket_size, exclude=DHTID.from_bytes(request.peer.node_id)): item.nearest_node_ids.append(node_id.to_bytes()) item.nearest_endpoints.append(endpoint) response.results.append(item) return response
async def rpc_store(self, request: dht_pb2.StoreRequest, context: grpc.ServicerContext) -> dht_pb2.StoreResponse: """ Some node wants us to store this (key, value) pair """ if request.peer: # if requested, add peer to the routing table asyncio.create_task(self.rpc_ping(dht_pb2.PingRequest(peer=request.peer), context)) assert len(request.keys) == len(request.values) == len(request.expiration_time) == len(request.in_cache) response = dht_pb2.StoreResponse(store_ok=[], peer=self.node_info) for key, tag, value_bytes, expiration_time, in_cache in zip( request.keys, request.subkeys, request.values, request.expiration_time, request.in_cache): key_id = DHTID.from_bytes(key) storage = self.cache if in_cache else self.storage if tag == self.IS_DICTIONARY: # store an entire dictionary with several subkeys value_dictionary = self.serializer.loads(value_bytes) assert isinstance(value_dictionary, DictionaryDHTValue) if not self._validate_dictionary(key, value_dictionary): response.store_ok.append(False) continue response.store_ok.append(all(storage.store_subkey(key_id, subkey, item.value, item.expiration_time) for subkey, item in value_dictionary.items())) continue if not self._validate_record(key, tag, value_bytes, expiration_time): response.store_ok.append(False) continue if tag == self.IS_REGULAR_VALUE: # store normal value without subkeys response.store_ok.append(storage.store(key_id, value_bytes, expiration_time)) else: # add a new entry into an existing dictionary value or create a new dictionary with one sub-key subkey = self.serializer.loads(tag) response.store_ok.append(storage.store_subkey(key_id, subkey, value_bytes, expiration_time)) return response
async def call_find( self, peer: Endpoint, keys: Collection[DHTID] ) -> Optional[Dict[DHTID, Tuple[Optional[ValueWithExpiration[Union[ BinaryDHTValue, DictionaryDHTValue]]], Dict[DHTID, Endpoint]]]]: """ Request keys from a peer. For each key, look for its (value, expiration time) locally and k additional peers that are most likely to have this key (ranked by XOR distance) :returns: A dict key => Tuple[optional value, optional expiration time, nearest neighbors] value: value stored by the recipient with that key, or None if peer doesn't have this value expiration time: expiration time of the returned value, None if no value was found neighbors: a dictionary[node_id : endpoint] containing nearest neighbors from peer's routing table If peer didn't respond, returns None """ keys = list(keys) find_request = dht_pb2.FindRequest(keys=list(map(DHTID.to_bytes, keys)), peer=self.node_info) try: async with self.rpc_semaphore: response = await self._get_dht_stub(peer).rpc_find( find_request, timeout=self.wait_timeout) if response.peer and response.peer.node_id: peer_id = DHTID.from_bytes(response.peer.node_id) asyncio.create_task( self.update_routing_table(peer_id, peer, responded=True)) assert len(keys) == len( response.results ), "DHTProtocol: response is not aligned with keys" output = {} # unpack data depending on its type for key, result in zip(keys, response.results): nearest = dict( zip(map(DHTID.from_bytes, result.nearest_node_ids), result.nearest_endpoints)) if result.type == dht_pb2.NOT_FOUND: output[key] = None, nearest elif result.type == dht_pb2.FOUND_REGULAR: output[key] = ValueWithExpiration( result.value, result.expiration_time), nearest elif result.type == dht_pb2.FOUND_DICTIONARY: deserialized_dictionary = self.serializer.loads( result.value) output[key] = ValueWithExpiration( deserialized_dictionary, result.expiration_time), nearest else: logger.error(f"Unknown result type: {result.type}") return output except grpc.aio.AioRpcError as error: logger.debug( f"DHTProtocol failed to find at {peer}: {error.code()}") asyncio.create_task( self.update_routing_table( self.routing_table.get(endpoint=peer), peer, responded=False))
async def call_store( self, peer: Endpoint, keys: Sequence[DHTID], values: Sequence[BinaryDHTValue], expiration_time: Union[DHTExpiration, Sequence[DHTExpiration]], in_cache: Optional[Union[bool, Sequence[bool]]] = None) -> Sequence[bool]: """ Ask a recipient to store several (key, value : expiration_time) items or update their older value :param peer: request this peer to store the data :param keys: a list of N keys digested by DHTID.generate(source=some_dict_key) :param values: a list of N serialized values (bytes) for each respective key :param expiration_time: a list of N expiration timestamps for each respective key-value pair (see get_dht_time()) :param in_cache: a list of booleans, True = store i-th key in cache, value = store i-th key locally :note: the difference between storing normally and in cache is that normal storage is guaranteed to be stored until expiration time (best-effort), whereas cached storage can be evicted early due to limited cache size :return: list of [True / False] True = stored, False = failed (found newer value or no response) if peer did not respond (e.g. due to timeout or congestion), returns None """ if isinstance(expiration_time, DHTExpiration): expiration_time = [expiration_time] * len(keys) in_cache = in_cache if in_cache is not None else [False] * len( keys) # default value (None) in_cache = [in_cache] * len(keys) if isinstance( in_cache, bool) else in_cache # single bool keys, values, expiration_time, in_cache = map( list, [keys, values, expiration_time, in_cache]) assert len(keys) == len(values) == len(expiration_time) == len( in_cache), "Data is not aligned" store_request = dht_pb2.StoreRequest(keys=list( map(DHTID.to_bytes, keys)), values=values, expiration_time=expiration_time, in_cache=in_cache, peer=self.node_info) try: async with self.rpc_semaphore: response = await self._get(peer).rpc_store( store_request, timeout=self.wait_timeout) if response.peer and response.peer.node_id: peer_id = DHTID.from_bytes(response.peer.node_id) asyncio.create_task( self.update_routing_table(peer_id, peer, responded=True)) return response.store_ok except grpc.experimental.aio.AioRpcError as error: logger.warning( f"DHTProtocol failed to store at {peer}: {error.code()}") asyncio.create_task( self.update_routing_table( self.routing_table.get(endpoint=peer), peer, responded=False)) return [False] * len(keys)
async def rpc_ping(self, peer_info: dht_pb2.NodeInfo, context: grpc.ServicerContext): """ Some node wants us to add it to our routing table. """ if peer_info.node_id and peer_info.rpc_port: sender_id = DHTID.from_bytes(peer_info.node_id) rpc_endpoint = replace_port(context.peer(), new_port=peer_info.rpc_port) asyncio.create_task( self.update_routing_table(sender_id, rpc_endpoint)) return self.node_info
async def call_store(self, peer: Endpoint, keys: Sequence[DHTID], values: Sequence[Union[BinaryDHTValue, DictionaryDHTValue]], expiration_time: Union[DHTExpiration, Sequence[DHTExpiration]], subkeys: Optional[Union[Subkey, Sequence[Optional[Subkey]]]] = None, in_cache: Optional[Union[bool, Sequence[bool]]] = None) -> Optional[List[bool]]: """ Ask a recipient to store several (key, value : expiration_time) items or update their older value :param peer: request this peer to store the data :param keys: a list of N keys digested by DHTID.generate(source=some_dict_key) :param values: a list of N serialized values (bytes) for each respective key :param expiration_time: a list of N expiration timestamps for each respective key-value pair(see get_dht_time()) :param subkeys: a list of N optional sub-keys. If None, stores value normally. If not subkey is not None: 1) if local storage doesn't have :key:, create a new dictionary {subkey: (value, expiration_time)} 2) if local storage already has a dictionary under :key:, try add (subkey, value, exp_time) to that dictionary 2) if local storage associates :key: with a normal value with smaller expiration, clear :key: and perform (1) 3) finally, if local storage currently associates :key: with a normal value with larger expiration, do nothing :param in_cache: a list of booleans, True = store i-th key in cache, value = store i-th key locally :note: the difference between storing normally and in cache is that normal storage is guaranteed to be stored until expiration time (best-effort), whereas cached storage can be evicted early due to limited cache size :return: list of [True / False] True = stored, False = failed (found newer value or no response) if peer did not respond (e.g. due to timeout or congestion), returns None """ if isinstance(expiration_time, DHTExpiration): expiration_time = [expiration_time] * len(keys) if subkeys is None: subkeys = [None] * len(keys) in_cache = in_cache if in_cache is not None else [False] * len(keys) # default value (None) in_cache = [in_cache] * len(keys) if isinstance(in_cache, bool) else in_cache # single bool keys, subkeys, values, expiration_time, in_cache = map(list, [keys, subkeys, values, expiration_time, in_cache]) for i in range(len(keys)): if subkeys[i] is None: # add default sub-key if not specified subkeys[i] = self.IS_DICTIONARY if isinstance(values[i], DictionaryDHTValue) else self.IS_REGULAR_VALUE else: subkeys[i] = self.serializer.dumps(subkeys[i]) if isinstance(values[i], DictionaryDHTValue): assert subkeys[i] == self.IS_DICTIONARY, "Please don't specify subkey when storing an entire dictionary" values[i] = self.serializer.dumps(values[i]) assert len(keys) == len(values) == len(expiration_time) == len(in_cache), "Data is not aligned" store_request = dht_pb2.StoreRequest(keys=list(map(DHTID.to_bytes, keys)), subkeys=subkeys, values=values, expiration_time=expiration_time, in_cache=in_cache, peer=self.node_info) try: async with self.rpc_semaphore: response = await self._get_dht_stub(peer).rpc_store(store_request, timeout=self.wait_timeout) if response.peer and response.peer.node_id: peer_id = DHTID.from_bytes(response.peer.node_id) asyncio.create_task(self.update_routing_table(peer_id, peer, responded=True)) return response.store_ok except grpc.aio.AioRpcError as error: logger.debug(f"DHTProtocol failed to store at {peer}: {error.code()}") asyncio.create_task(self.update_routing_table(self.routing_table.get(endpoint=peer), peer, responded=False)) return None
async def call_ping(self, peer: Endpoint, validate: bool = False, strict: bool = True) -> Optional[DHTID]: """ Get peer's node id and add him to the routing table. If peer doesn't respond, return None :param peer: string network address, e.g. 123.123.123.123:1337 or [2a21:6с8:b192:2105]:8888 :param validate: if True, validates that node's endpoint is available :param strict: if strict=True, validation will raise exception on fail, otherwise it will only warn :note: if DHTProtocol was created with listen=True, also request peer to add you to his routing table :return: node's DHTID, if peer responded and decided to send his node_id """ try: async with self.rpc_semaphore: ping_request = dht_pb2.PingRequest(peer=self.node_info, validate=validate) time_requested = get_dht_time() response = await self._get_dht_stub(peer).rpc_ping( ping_request, timeout=self.wait_timeout) time_responded = get_dht_time() except grpc.aio.AioRpcError as error: logger.debug(f"DHTProtocol failed to ping {peer}: {error.code()}") response = None responded = bool(response and response.peer and response.peer.node_id) if responded and validate: try: if self.server is not None and not response.available: raise ValidationError( f"Peer {peer} couldn't access this node at {response.sender_endpoint} . " f"Make sure that this port is open for incoming requests." ) if response.dht_time != dht_pb2.PingResponse.dht_time.DESCRIPTOR.default_value: if response.dht_time < time_requested - MAX_DHT_TIME_DISCREPANCY_SECONDS or \ response.dht_time > time_responded + MAX_DHT_TIME_DISCREPANCY_SECONDS: raise ValidationError( f"local time must be within {MAX_DHT_TIME_DISCREPANCY_SECONDS} seconds " f" of others(local: {time_requested:.5f}, peer: {response.dht_time:.5f})" ) except ValidationError as e: if strict: raise else: logger.warning(repr(e)) peer_id = DHTID.from_bytes( response.peer.node_id) if responded else None asyncio.create_task( self.update_routing_table(peer_id, peer, responded=responded)) return peer_id
async def call_find(self, peer: Endpoint, keys: Collection[DHTID]) -> \ Optional[Dict[DHTID, Tuple[Optional[BinaryDHTValue], Optional[DHTExpiration], Dict[DHTID, Endpoint]]]]: """ Request keys from a peer. For each key, look for its (value, expiration time) locally and k additional peers that are most likely to have this key (ranked by XOR distance) :returns: A dict key => Tuple[optional value, optional expiration time, nearest neighbors] value: value stored by the recipient with that key, or None if peer doesn't have this value expiration time: expiration time of the returned value, None if no value was found neighbors: a dictionary[node_id : endpoint] containing nearest neighbors from peer's routing table If peer didn't respond, returns None """ keys = list(keys) find_request = dht_pb2.FindRequest(keys=list(map(DHTID.to_bytes, keys)), peer=self.node_info) try: async with self.rpc_semaphore: response = await self._get(peer).rpc_find( find_request, timeout=self.wait_timeout) if response.peer and response.peer.node_id: peer_id = DHTID.from_bytes(response.peer.node_id) asyncio.create_task( self.update_routing_table(peer_id, peer, responded=True)) assert len(response.values) == len(response.expiration_time) == len(response.nearest) == len(keys), \ "DHTProtocol: response is not aligned with keys and/or expiration times" output = {} # unpack data without special NOT_FOUND_* values for key, value, expiration_time, nearest in zip( keys, response.values, response.expiration_time, response.nearest): value = value if value != _NOT_FOUND_VALUE else None expiration_time = expiration_time if expiration_time != _NOT_FOUND_EXPIRATION else None nearest = dict( zip(map(DHTID.from_bytes, nearest.node_ids), nearest.endpoints)) output[key] = (value, expiration_time, nearest) return output except grpc.experimental.aio.AioRpcError as error: logger.warning( f"DHTProtocol failed to find at {peer}: {error.code()}") asyncio.create_task( self.update_routing_table( self.routing_table.get(endpoint=peer), peer, responded=False))
async def rpc_store( self, request: dht_pb2.StoreRequest, context: grpc.ServicerContext) -> dht_pb2.StoreResponse: """ Some node wants us to store this (key, value) pair """ if request.peer: # if requested, add peer to the routing table asyncio.create_task(self.rpc_ping(request.peer, context)) assert len(request.keys) == len(request.values) == len( request.expiration_time) == len(request.in_cache) response = dht_pb2.StoreResponse(store_ok=[], peer=self.node_info) for key_bytes, value_bytes, expiration_time, in_cache in zip( request.keys, request.values, request.expiration_time, request.in_cache): local_memory = self.cache if in_cache else self.storage response.store_ok.append( local_memory.store(DHTID.from_bytes(key_bytes), value_bytes, expiration_time)) return response
async def call_ping(self, peer: Endpoint) -> Optional[DHTID]: """ Get peer's node id and add him to the routing table. If peer doesn't respond, return None :param peer: string network address, e.g. 123.123.123.123:1337 or [2a21:6с8:b192:2105]:8888 :note: if DHTProtocol was created with listen=True, also request peer to add you to his routing table :return: node's DHTID, if peer responded and decided to send his node_id """ try: async with self.rpc_semaphore: peer_info = await self._get(peer).rpc_ping( self.node_info, timeout=self.wait_timeout) except grpc.experimental.aio.AioRpcError as error: logger.warning( f"DHTProtocol failed to ping {peer}: {error.code()}") peer_info = None responded = bool(peer_info and peer_info.node_id) peer_id = DHTID.from_bytes(peer_info.node_id) if responded else None asyncio.create_task( self.update_routing_table(peer_id, peer, responded=responded)) return peer_id
async def rpc_find(self, request: dht_pb2.FindRequest, context: grpc.ServicerContext) -> dht_pb2.FindResponse: """ Someone wants to find keys in the DHT. For all keys that we have locally, return value and expiration Also return :bucket_size: nearest neighbors from our routing table for each key (whether or not we found value) """ if request.peer: # if requested, add peer to the routing table asyncio.create_task(self.rpc_ping(request.peer, context)) response = dht_pb2.FindResponse(values=[], expiration_time=[], nearest=[], peer=self.node_info) for key_id in map(DHTID.from_bytes, request.keys): maybe_value, maybe_expiration_time = self.storage.get(key_id) cached_value, cached_expiration_time = self.cache.get(key_id) if (cached_expiration_time or -float('inf')) > (maybe_expiration_time or -float('inf')): maybe_value, maybe_expiration_time = cached_value, cached_expiration_time nearest_neighbors = self.routing_table.get_nearest_neighbors( key_id, k=self.bucket_size, exclude=DHTID.from_bytes(request.peer.node_id)) if nearest_neighbors: peer_ids, endpoints = zip(*nearest_neighbors) else: peer_ids, endpoints = [], [] response.values.append( maybe_value if maybe_value is not None else _NOT_FOUND_VALUE) response.expiration_time.append( maybe_expiration_time if maybe_expiration_time else _NOT_FOUND_EXPIRATION) response.nearest.append( dht_pb2.Peers(node_ids=list(map(DHTID.to_bytes, peer_ids)), endpoints=endpoints)) return response