def test_invalid_key_length(self): self.assertRaises(ValueError, Distance, b'1' * 47) self.assertRaises(ValueError, Distance, b'1' * 49) self.assertRaises(ValueError, Distance, b'') self.assertRaises(ValueError, Distance(b'0' * 48), b'1' * 47) self.assertRaises(ValueError, Distance(b'0' * 48), b'1' * 49) self.assertRaises(ValueError, Distance(b'0' * 48), b'')
def midpoint_id_in_bucket_range(self, bucket_index: int) -> bytes: half = int((self.buckets[bucket_index].range_max - self.buckets[bucket_index].range_min) // 2) return Distance(self._parent_node_id)( int(self.buckets[bucket_index].range_min + half).to_bytes( constants.hash_length, 'big')).to_bytes(constants.hash_length, 'big')
def random_id_in_bucket_range(self, bucket_index: int) -> bytes: random_id = int( random.randrange(self.buckets[bucket_index].range_min, self.buckets[bucket_index].range_max)) return Distance(self._parent_node_id)(random_id.to_bytes( constants.hash_length, 'big')).to_bytes(constants.hash_length, 'big')
def __init__( self, loop: asyncio.AbstractEventLoop, peer_manager: 'PeerManager', routing_table: 'TreeRoutingTable', protocol: 'KademliaProtocol', key: bytes, bottom_out_limit: typing.Optional[int] = 2, max_results: typing.Optional[int] = constants.K, exclude: typing.Optional[typing.List[typing.Tuple[str, int]]] = None, shortlist: typing.Optional[typing.List['KademliaPeer']] = None): if len(key) != constants.HASH_LENGTH: raise ValueError("invalid key length: %i" % len(key)) self.loop = loop self.peer_manager = peer_manager self.routing_table = routing_table self.protocol = protocol self.key = key self.bottom_out_limit = bottom_out_limit self.max_results = max_results self.exclude = exclude or [] self.active: typing.Set['KademliaPeer'] = set() self.contacted: typing.Set['KademliaPeer'] = set() self.distance = Distance(key) self.closest_peer: typing.Optional['KademliaPeer'] = None self.prev_closest_peer: typing.Optional['KademliaPeer'] = None self.iteration_queue = asyncio.Queue(loop=self.loop) self.running_probes: typing.Set[asyncio.Task] = set() self.iteration_count = 0 self.bottom_out_count = 0 self.running = False self.tasks: typing.List[asyncio.Task] = [] self.delayed_calls: typing.List[asyncio.Handle] = [] for peer in get_shortlist(routing_table, key, shortlist): if peer.node_id: self._add_active(peer) else: # seed nodes self._schedule_probe(peer)
async def test_announce_blobs(self): blob1 = binascii.hexlify(b'1' * 48).decode() blob2 = binascii.hexlify(b'2' * 48).decode() async with self._test_network_context(peer_count=100): await self.storage.add_blobs((blob1, 1024, 0, True), (blob2, 1024, 0, True), finished=True) await self.storage.add_blobs( *((constants.generate_id(value).hex(), 1024, 0, True) for value in range(1000, 1090)), finished=True) await self.storage.db.execute("update blob set next_announce_time=0, should_announce=1") to_announce = await self.storage.get_blobs_to_announce() self.assertEqual(92, len(to_announce)) self.blob_announcer.start(batch_size=10) # so it covers batching logic # takes 60 seconds to start, but we advance 120 to ensure it processed all batches ongoing_announcements = asyncio.ensure_future(self.blob_announcer.wait()) await self.instant_advance(60.0) await ongoing_announcements to_announce = await self.storage.get_blobs_to_announce() self.assertEqual(0, len(to_announce)) self.blob_announcer.stop() # as routing table pollution will cause some peers to be hard to reach, we add a tolerance for CI tolerance = 0.8 # at least 80% of the announcements are within the top K for blob in await self.storage.get_all_blob_hashes(): distance = Distance(bytes.fromhex(blob)) candidates = list(self.nodes.values()) candidates.sort(key=lambda sorting_node: distance(sorting_node.protocol.node_id)) has_it = 0 for index, node in enumerate(candidates[:constants.K], start=1): if node.protocol.data_store.get_peers_for_blob(bytes.fromhex(blob)): has_it += 1 else: logging.warning("blob %s wasnt found between the best K (%s)", blob[:8], node.protocol.node_id.hex()[:8]) self.assertGreaterEqual(has_it, int(tolerance * constants.K)) # test that we can route from a poorly connected peer all the way to the announced blob current = len(self.nodes) await self.chain_peer(constants.generate_id(current + 1), '1.2.3.10') await self.chain_peer(constants.generate_id(current + 2), '1.2.3.11') await self.chain_peer(constants.generate_id(current + 3), '1.2.3.12') await self.chain_peer(constants.generate_id(current + 4), '1.2.3.13') last = await self.chain_peer(constants.generate_id(current + 5), '1.2.3.14') search_q, peer_q = asyncio.Queue(loop=self.loop), asyncio.Queue(loop=self.loop) search_q.put_nowait(blob1) _, task = last.accumulate_peers(search_q, peer_q) found_peers = await asyncio.wait_for(peer_q.get(), 1.0) task.cancel() self.assertEqual(1, len(found_peers)) self.assertEqual(self.node.protocol.node_id, found_peers[0].node_id) self.assertEqual(self.node.protocol.external_ip, found_peers[0].address) self.assertEqual(self.node.protocol.peer_port, found_peers[0].tcp_port)
def should_split(self, bucket_index: int, to_add: bytes) -> bool: # https://stackoverflow.com/questions/32129978/highly-unbalanced-kademlia-routing-table/32187456#32187456 if bucket_index < self._split_buckets_under_index: return True contacts = self.get_peers() distance = Distance(self._parent_node_id) contacts.sort(key=lambda c: distance(c.node_id)) kth_contact = contacts[-1] if len( contacts) < constants.k else contacts[constants.k - 1] return distance(to_add) < distance(kth_contact.node_id)
async def peer_search(self, node_id: bytes, count=constants.k, max_results=constants.k*2, bottom_out_limit=20, shortlist: typing.Optional[typing.List['KademliaPeer']] = None ) -> typing.List['KademliaPeer']: peers = [] async for iteration_peers in self.get_iterative_node_finder( node_id, shortlist=shortlist, bottom_out_limit=bottom_out_limit, max_results=max_results): peers.extend(iteration_peers) distance = Distance(node_id) peers.sort(key=lambda peer: distance(peer.node_id)) return peers[:count]
def get_peers(self, count=-1, exclude_contact=None, sort_distance_to=None) -> typing.List['KademliaPeer']: """ Returns a list containing up to the first count number of contacts @param count: The amount of contacts to return (if 0 or less, return all contacts) @type count: int @param exclude_contact: A node node_id to exclude; if this contact is in the list of returned values, it will be discarded before returning. If a C{str} is passed as this argument, it must be the contact's ID. @type exclude_contact: str @param sort_distance_to: Sort distance to the node_id, defaulting to the parent node node_id. If False don't sort the contacts @raise IndexError: If the number of requested contacts is too large @return: Return up to the first count number of contacts in a list If no contacts are present an empty is returned @rtype: list """ peers = [ peer for peer in self.peers if peer.node_id != exclude_contact ] # Return all contacts in bucket if count <= 0: count = len(peers) # Get current contact number current_len = len(peers) # If count greater than k - return only k contacts if count > constants.k: count = constants.k if not current_len: return peers if sort_distance_to is False: pass else: sort_distance_to = sort_distance_to or self._node_id peers.sort(key=lambda c: Distance(sort_distance_to)(c.node_id)) return peers[:min(current_len, count)]
def __init__(self, peer_manager: 'PeerManager', range_min: int, range_max: int, node_id: bytes): """ @param range_min: The lower boundary for the range in the n-bit ID space covered by this k-bucket @param range_max: The upper boundary for the range in the ID space covered by this k-bucket """ self._peer_manager = peer_manager self.last_accessed = 0 self.range_min = range_min self.range_max = range_max self.peers: typing.List['KademliaPeer'] = [] self._node_id = node_id self._distance_to_self = Distance(node_id)
def find_close_peers( self, key: bytes, count: typing.Optional[int] = None, sender_node_id: typing.Optional[bytes] = None ) -> typing.List['KademliaPeer']: exclude = [self._parent_node_id] if sender_node_id: exclude.append(sender_node_id) count = count or constants.k distance = Distance(key) contacts = self.get_peers() contacts = [c for c in contacts if c.node_id not in exclude] if contacts: contacts.sort(key=lambda c: distance(c.node_id)) return contacts[:min(count, len(contacts))] return []
async def peer_search( self, node_id: bytes, count=constants.K, max_results=constants.K * 2, shortlist: typing.Optional[typing.List['KademliaPeer']] = None ) -> typing.List['KademliaPeer']: peers = [] async with aclosing( self.get_iterative_node_finder( node_id, shortlist=shortlist, max_results=max_results)) as node_finder: async for iteration_peers in node_finder: peers.extend(iteration_peers) distance = Distance(node_id) peers.sort(key=lambda peer: distance(peer.node_id)) return peers[:count]
class IterativeFinder: def __init__( self, loop: asyncio.AbstractEventLoop, peer_manager: 'PeerManager', routing_table: 'TreeRoutingTable', protocol: 'KademliaProtocol', key: bytes, bottom_out_limit: typing.Optional[int] = 2, max_results: typing.Optional[int] = constants.K, exclude: typing.Optional[typing.List[typing.Tuple[str, int]]] = None, shortlist: typing.Optional[typing.List['KademliaPeer']] = None): if len(key) != constants.HASH_LENGTH: raise ValueError("invalid key length: %i" % len(key)) self.loop = loop self.peer_manager = peer_manager self.routing_table = routing_table self.protocol = protocol self.key = key self.bottom_out_limit = bottom_out_limit self.max_results = max_results self.exclude = exclude or [] self.active: typing.Set['KademliaPeer'] = set() self.contacted: typing.Set['KademliaPeer'] = set() self.distance = Distance(key) self.closest_peer: typing.Optional['KademliaPeer'] = None self.prev_closest_peer: typing.Optional['KademliaPeer'] = None self.iteration_queue = asyncio.Queue(loop=self.loop) self.running_probes: typing.Set[asyncio.Task] = set() self.iteration_count = 0 self.bottom_out_count = 0 self.running = False self.tasks: typing.List[asyncio.Task] = [] self.delayed_calls: typing.List[asyncio.Handle] = [] for peer in get_shortlist(routing_table, key, shortlist): if peer.node_id: self._add_active(peer) else: # seed nodes self._schedule_probe(peer) async def send_probe(self, peer: 'KademliaPeer') -> FindResponse: """ Send the rpc request to the peer and return an object with the FindResponse interface """ raise NotImplementedError() def search_exhausted(self): """ This method ends the iterator due no more peers to contact. Override to provide last time results. """ self.iteration_queue.put_nowait(None) def check_result_ready(self, response: FindResponse): """ Called after adding peers from an rpc result to the shortlist. This method is responsible for putting a result for the generator into the Queue """ raise NotImplementedError() def get_initial_result(self) -> typing.List['KademliaPeer']: #pylint: disable=no-self-use """ Get an initial or cached result to be put into the Queue. Used for findValue requests where the blob has peers in the local data store of blobs announced to us """ return [] def _is_closer(self, peer: 'KademliaPeer') -> bool: return not self.closest_peer or self.distance.is_closer( peer.node_id, self.closest_peer.node_id) def _add_active(self, peer): if peer not in self.active and peer.node_id and peer.node_id != self.protocol.node_id: self.active.add(peer) if self._is_closer(peer): self.prev_closest_peer = self.closest_peer self.closest_peer = peer async def _handle_probe_result(self, peer: 'KademliaPeer', response: FindResponse): self._add_active(peer) for contact_triple in response.get_close_triples(): node_id, address, udp_port = contact_triple try: self._add_active(make_kademlia_peer(node_id, address, udp_port)) except ValueError: log.warning( "misbehaving peer %s:%i returned peer with reserved ip %s:%i", peer.address, peer.udp_port, address, udp_port) self.check_result_ready(response) async def _send_probe(self, peer: 'KademliaPeer'): try: response = await self.send_probe(peer) except asyncio.TimeoutError: self.active.discard(peer) return except ValueError as err: log.warning(str(err)) self.active.discard(peer) return except TransportNotConnected: return self.aclose() except RemoteException: return return await self._handle_probe_result(peer, response) async def _search_round(self): """ Send up to constants.alpha (5) probes to closest active peers """ added = 0 to_probe = list(self.active - self.contacted) to_probe.sort(key=lambda peer: self.distance(self.key)) for peer in to_probe: if added >= constants.ALPHA: break origin_address = (peer.address, peer.udp_port) if origin_address in self.exclude: continue if peer.node_id == self.protocol.node_id: continue if origin_address == (self.protocol.external_ip, self.protocol.udp_port): continue self._schedule_probe(peer) added += 1 log.debug("running %d probes for key %s", len(self.running_probes), self.key.hex()[:8]) if not added and not self.running_probes: log.debug("search for %s exhausted", self.key.hex()[:8]) self.search_exhausted() def _schedule_probe(self, peer: 'KademliaPeer'): self.contacted.add(peer) t = self.loop.create_task(self._send_probe(peer)) def callback(_): self.running_probes.difference_update({ probe for probe in self.running_probes if probe.done() or probe == t }) if not self.running_probes: self.tasks.append(self.loop.create_task( self._search_task(0.0))) t.add_done_callback(callback) self.running_probes.add(t) async def _search_task( self, delay: typing.Optional[float] = constants.ITERATIVE_LOOKUP_DELAY): try: if self.running: await self._search_round() if self.running: self.delayed_calls.append( self.loop.call_later(delay, self._search)) except (asyncio.CancelledError, StopAsyncIteration, TransportNotConnected): if self.running: self.loop.call_soon(self.aclose) def _search(self): self.tasks.append(self.loop.create_task(self._search_task())) def __aiter__(self): if self.running: raise Exception("already running") self.running = True self._search() return self async def __anext__(self) -> typing.List['KademliaPeer']: try: if self.iteration_count == 0: result = self.get_initial_result( ) or await self.iteration_queue.get() else: result = await self.iteration_queue.get() if not result: raise StopAsyncIteration self.iteration_count += 1 return result except (asyncio.CancelledError, StopAsyncIteration): self.loop.call_soon(self.aclose) raise def aclose(self): self.running = False self.iteration_queue.put_nowait(None) for task in chain(self.tasks, self.running_probes, self.delayed_calls): task.cancel() self.tasks.clear() self.running_probes.clear() self.delayed_calls.clear()