def __init__(self, node, shortlist, key, rpc, exclude=None): self.exclude = set(exclude or []) self.node = node self.finished_deferred = defer.Deferred() # all distance operations in this class only care about the distance # to self.key, so this makes it easier to calculate those self.distance = Distance(key) # The closest known and active node yet found self.closest_node = None if not shortlist else shortlist[0] self.prev_closest_node = None # Shortlist of contact objects (the k closest known contacts to the key from the routing table) self.shortlist = shortlist # The search key self.key = key # The rpc method name (findValue or findNode) self.rpc = rpc # List of active queries; len() indicates number of active probes self.active_probes = [] # List of contact (address, port) tuples that have already been queried, includes contacts that didn't reply self.already_contacted = [] # A list of found and known-to-be-active remote nodes (Contact objects) self.active_contacts = [] # Ensure only one searchIteration call is running at a time self._search_iteration_semaphore = defer.DeferredSemaphore(1) self._iteration_count = 0 self.find_value_result = {} self.pending_iteration_calls = []
def findCloseNodes(self, key, count=None, sender_node_id=None): """ Finds a number of known nodes closest to the node/value with the specified key. @param key: the n-bit key (i.e. the node or value ID) to search for @type key: str @param count: the amount of contacts to return, default of k (8) @type count: int @param sender_node_id: Used during RPC, this is be the sender's Node ID Whatever ID is passed in the parameter will get excluded from the list of returned contacts. @type sender_node_id: str @return: A list of node contacts (C{kademlia.contact.Contact instances}) closest to the specified key. This method will return C{k} (or C{count}, if specified) contacts if at all possible; it will only return fewer if the node is returning all of the contacts that it knows of. @rtype: list """ exclude = [self._parentNodeID] if sender_node_id: exclude.append(sender_node_id) if key in exclude: exclude.remove(key) count = count or constants.k distance = Distance(key) contacts = self.get_contacts() contacts = [c for c in contacts if c.id not in exclude] contacts.sort(key=lambda c: distance(c.id)) return contacts[:min(count, len(contacts))]
def _shouldSplit(self, bucketIndex, toAdd): # https://stackoverflow.com/questions/32129978/highly-unbalanced-kademlia-routing-table/32187456#32187456 if self._buckets[bucketIndex].keyInRange(self._parentNodeID): return True contacts = self.get_contacts() distance = Distance(self._parentNodeID) contacts.sort(key=lambda c: distance(c.id)) kth_contact = contacts[-1] if len( contacts) < constants.k else contacts[constants.k - 1] return distance(toAdd) < distance(kth_contact.id)
def testDistance(self): """ Test to see if distance method returns correct result""" # testList holds a couple 3-tuple (variable1, variable2, result) basicTestList = [(chr(170) * 48, chr(85) * 48, long((chr(255) * 48).encode('hex'), 16))] for test in basicTestList: result = Distance(test[0])(test[1]) self.failIf( result != test[2], 'Result of _distance() should be %s but %s returned' % (test[2], result))
def getContacts(self, count=-1, excludeContact=None, sort_distance_to=None): """ Returns a list containing up to the first count number of contacts @param count: The amount of contacts to return (if 0 or less, return all contacts) @type count: int @param excludeContact: A node id to exclude; if this contact is in the list of returned values, it will be discarded before returning. If a C{str} is passed as this argument, it must be the contact's ID. @type excludeContact: str @param sort_distance_to: Sort distance to the id, defaulting to the parent node id. If False don't sort the contacts @raise IndexError: If the number of requested contacts is too large @return: Return up to the first count number of contacts in a list If no contacts are present an empty is returned @rtype: list """ contacts = [ contact for contact in self._contacts if contact.id != excludeContact ] # Return all contacts in bucket if count <= 0: count = len(contacts) # Get current contact number currentLen = len(contacts) # If count greater than k - return only k contacts if count > constants.k: count = constants.k if not currentLen: return contacts if sort_distance_to is False: pass else: sort_distance_to = sort_distance_to or self._node_id contacts.sort(key=lambda c: Distance(sort_distance_to)(c.id)) return contacts[:min(currentLen, count)]
def test_find_node(self): last_node_id = self.nodes[-1].node_id to_last_node = Distance(last_node_id) for n in self.nodes: find_close_nodes_result = n._routingTable.findCloseNodes( last_node_id, constants.k) self.assertEqual(len(find_close_nodes_result), constants.k) found_ids = [c.id for c in find_close_nodes_result] self.assertListEqual( found_ids, sorted(found_ids, key=lambda x: to_last_node(x))) if last_node_id in [c.id for c in n.contacts]: self.assertEqual(found_ids[0], last_node_id) else: self.assertNotIn(last_node_id, found_ids)
def test_find_node(self): last_node_id = self.nodes[-1].node_id.encode('hex') to_last_node = Distance(last_node_id.decode('hex')) for n in self.nodes: find_close_nodes_result = n._routingTable.findCloseNodes( last_node_id.decode('hex'), constants.k) self.assertTrue(len(find_close_nodes_result) == constants.k) found_ids = [c.id.encode('hex') for c in find_close_nodes_result] self.assertListEqual( found_ids, sorted(found_ids, key=lambda x: to_last_node(x.decode('hex')))) if last_node_id in [c.id.encode('hex') for c in n.contacts]: self.assertTrue(found_ids[0] == last_node_id) else: self.assertTrue(last_node_id not in found_ids)
def test_distance(self): """ Test to see if distance method returns correct result""" d = Distance(bytes((170,) * 48)) result = d(bytes((85,) * 48)) expected = int(hexlify(bytes((255,) * 48)), 16) self.assertEqual(result, expected)
class _IterativeFind: # TODO: use polymorphism to search for a value or node # instead of using a find_value flag def __init__(self, node, shortlist, key, rpc, exclude=None): self.exclude = set(exclude or []) self.node = node self.finished_deferred = defer.Deferred() # all distance operations in this class only care about the distance # to self.key, so this makes it easier to calculate those self.distance = Distance(key) # The closest known and active node yet found self.closest_node = None if not shortlist else shortlist[0] self.prev_closest_node = None # Shortlist of contact objects (the k closest known contacts to the key from the routing table) self.shortlist = shortlist # The search key self.key = key # The rpc method name (findValue or findNode) self.rpc = rpc # List of active queries; len() indicates number of active probes self.active_probes = [] # List of contact (address, port) tuples that have already been queried, includes contacts that didn't reply self.already_contacted = [] # A list of found and known-to-be-active remote nodes (Contact objects) self.active_contacts = [] # Ensure only one searchIteration call is running at a time self._search_iteration_semaphore = defer.DeferredSemaphore(1) self._iteration_count = 0 self.find_value_result = {} self.pending_iteration_calls = [] @property def is_find_node_request(self): return self.rpc == "findNode" @property def is_find_value_request(self): return self.rpc == "findValue" def is_closer(self, contact): if not self.closest_node: return True return self.distance.is_closer(contact.id, self.closest_node.id) def getContactTriples(self, result): if self.is_find_value_request: contact_triples = result[b'contacts'] else: contact_triples = result for contact_tup in contact_triples: if not isinstance(contact_tup, (list, tuple)) or len(contact_tup) != 3: raise ValueError("invalid contact triple") contact_tup[1] = contact_tup[1].decode() # ips are strings return contact_triples def sortByDistance(self, contact_list): """Sort the list of contacts in order by distance from key""" contact_list.sort(key=lambda c: self.distance(c.id)) def extendShortlist(self, contact, result): # The "raw response" tuple contains the response message and the originating address info originAddress = (contact.address, contact.port) if self.finished_deferred.called: return contact.id if self.node.contact_manager.is_ignored(originAddress): raise ValueError("contact is ignored") if contact.id == self.node.node_id: return contact.id if contact not in self.active_contacts: self.active_contacts.append(contact) if contact not in self.shortlist: self.shortlist.append(contact) # Now grow extend the (unverified) shortlist with the returned contacts # TODO: some validation on the result (for guarding against attacks) # If we are looking for a value, first see if this result is the value # we are looking for before treating it as a list of contact triples if self.is_find_value_request and self.key in result: # We have found the value for peer in result[self.key]: node_id, host, port = expand_peer(peer) if (host, port) not in self.exclude: self.find_value_result.setdefault(self.key, []).append((node_id, host, port)) if self.find_value_result: self.finished_deferred.callback(self.find_value_result) else: if self.is_find_value_request: # We are looking for a value, and the remote node didn't have it # - mark it as the closest "empty" node, if it is # TODO: store to this peer after finding the value as per the kademlia spec if b'closestNodeNoValue' in self.find_value_result: if self.is_closer(contact): self.find_value_result[b'closestNodeNoValue'] = contact else: self.find_value_result[b'closestNodeNoValue'] = contact contactTriples = self.getContactTriples(result) for contactTriple in contactTriples: if (contactTriple[1], contactTriple[2]) in ((c.address, c.port) for c in self.already_contacted): continue elif self.node.contact_manager.is_ignored((contactTriple[1], contactTriple[2])): continue else: found_contact = self.node.contact_manager.make_contact(contactTriple[0], contactTriple[1], contactTriple[2], self.node._protocol) if found_contact not in self.shortlist: self.shortlist.append(found_contact) if not self.finished_deferred.called and self.should_stop(): self.sortByDistance(self.active_contacts) self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) return contact.id @defer.inlineCallbacks def probeContact(self, contact): fn = getattr(contact, self.rpc) try: response = yield fn(self.key) result = self.extendShortlist(contact, response) defer.returnValue(result) except (TimeoutError, defer.CancelledError, ValueError, IndexError): defer.returnValue(contact.id) def should_stop(self): if self.is_find_value_request: # search stops when it finds a value, let it run return False if self.prev_closest_node and self.closest_node and self.distance.is_closer(self.prev_closest_node.id, self.closest_node.id): # we're getting further away return True if len(self.active_contacts) >= constants.k: # we have enough results return True return False # Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts def _searchIteration(self): # Sort the discovered active nodes from closest to furthest if len(self.active_contacts): self.sortByDistance(self.active_contacts) self.prev_closest_node = self.closest_node self.closest_node = self.active_contacts[0] # Sort the current shortList before contacting other nodes self.sortByDistance(self.shortlist) probes = [] already_contacted_addresses = {(c.address, c.port) for c in self.already_contacted} to_remove = [] for contact in self.shortlist: if self.node.contact_manager.is_ignored((contact.address, contact.port)): to_remove.append(contact) # a contact became bad during iteration continue if (contact.address, contact.port) not in already_contacted_addresses: self.already_contacted.append(contact) to_remove.append(contact) probe = self.probeContact(contact) probes.append(probe) self.active_probes.append(probe) if len(probes) == constants.alpha: break for contact in to_remove: # these contacts will be re-added to the shortlist when they reply successfully self.shortlist.remove(contact) # run the probes if probes: # Schedule the next iteration if there are any active # calls (Kademlia uses loose parallelism) self.searchIteration() d = defer.DeferredList(probes, consumeErrors=True) def _remove_probes(results): for probe in probes: self.active_probes.remove(probe) return results d.addCallback(_remove_probes) elif not self.finished_deferred.called and not self.active_probes or self.should_stop(): # If no probes were sent, there will not be any improvement, so we're done if self.is_find_value_request: self.finished_deferred.callback(self.find_value_result) else: self.sortByDistance(self.active_contacts) self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) elif not self.finished_deferred.called: # Force the next iteration self.searchIteration() def searchIteration(self, delay=constants.iterativeLookupDelay): def _cancel_pending_iterations(result): while self.pending_iteration_calls: canceller = self.pending_iteration_calls.pop() canceller() return result self.finished_deferred.addBoth(_cancel_pending_iterations) self._iteration_count += 1 call, cancel = self.node.reactor_callLater(delay, self._search_iteration_semaphore.run, self._searchIteration) self.pending_iteration_calls.append(cancel)