class SpiderCrawl(object): """ Crawl the network and look for given 160-bit keys. """ def __init__(self, protocol, node, peers, ksize, alpha): """ Create a new C{SpiderCrawl}er. Args: protocol: A :class:`~kademlia.protocol.KademliaProtocol` instance. node: A :class:`~kademlia.node.Node` representing the key we're looking for peers: A list of :class:`~kademlia.node.Node` instances that provide the entry point for the network ksize: The value for k based on the paper alpha: The value for alpha based on the paper """ self.protocol = protocol self.ksize = ksize self.alpha = alpha self.node = node self.nearest = NodeHeap(self.node, self.ksize) self.lastIDsCrawled = [] self.log = Logger(system=self) self.log.info("creating spider with peers: %s" % peers) self.nearest.push(peers) def onError(self, err): self.log.error(repr(err)) return err def _find(self, rpcmethod): """ Get either a value or list of nodes. Args: rpcmethod: The protocol's callfindValue or callFindNode. The process: 1. calls find_* to current ALPHA nearest not already queried nodes, adding results to current nearest list of k nodes. 2. current nearest list needs to keep track of who has been queried already sort by nearest, keep KSIZE 3. if list is same as last time, next call should be to everyone not yet queried 4. repeat, unless nearest list has all been queried, then ur done """ self.log.info("crawling with nearest: %s" % str(tuple(self.nearest))) count = self.alpha if self.nearest.getIDs() == self.lastIDsCrawled: self.log.info("last iteration same as current - checking all in list now") count = len(self.nearest) self.lastIDsCrawled = self.nearest.getIDs() ds = {} for peer in self.nearest.getUncontacted()[:count]: ds[peer.id] = rpcmethod(peer, self.node) self.nearest.markContacted(peer) d = deferredDict(ds) d.addCallback(self._nodesFound) d.addErrback(self.onError) return d
class Server(object): """ High level view of a node instance. This is the object that should be created to start listening as an active node on the network. """ def __init__(self, ksize=20, alpha=3, id=None, storage=None): """ Create a server instance. This will start listening on the given port. Args: ksize (int): The k parameter from the paper alpha (int): The alpha parameter from the paper id: The id for this node on the network. storage: An instance that implements :interface:`~kademlia.storage.IStorage` """ self.ksize = ksize self.alpha = alpha self.log = Logger(system=self) self.storage = storage or ForgetfulStorage() self.node = Node(id or digest(random.getrandbits(255))) self.protocol = KademliaProtocol(self.node, self.storage, ksize) self.refreshLoop = LoopingCall(self.refreshTable).start(3600) def onError(self, err): self.log.error(repr(err)) return err def listen(self, port): """ Start listening on the given port. This is the same as calling:: reactor.listenUDP(port, server.protocol) """ return reactor.listenUDP(port, self.protocol) def refreshTable(self): """ Refresh buckets that haven't had any lookups in the last hour (per section 2.3 of the paper). """ ds = [] for id in self.protocol.getRefreshIDs(): node = Node(id) nearest = self.protocol.router.findNeighbors(node, self.alpha) spider = NodeSpiderCrawl(self.protocol, node, nearest) ds.append(spider.find()) def republishKeys(_): ds = [] # Republish keys older than one hour for key, value in self.storage.iteritemsOlderThan(3600): ds.append(self.set(key, value)) return defer.gatherResults(ds) d = defer.gatherResults(ds) d.addCallback(republishKeys) d.addErrback(self.onError) return d def bootstrappableNeighbors(self): """ Get a :class:`list` of (ip, port) :class:`tuple` pairs suitable for use as an argument to the bootstrap method. The server should have been bootstrapped already - this is just a utility for getting some neighbors and then storing them if this server is going down for a while. When it comes back up, the list of nodes can be used to bootstrap. """ neighbors = self.protocol.router.findNeighbors(self.node) return [ tuple(n)[-2:] for n in neighbors ] def bootstrap(self, addrs): """ Bootstrap the server by connecting to other known nodes in the network. Args: addrs: A `list` of (ip, port) `tuple` pairs. Note that only IP addresses are acceptable - hostnames will cause an error. """ # if the transport hasn't been initialized yet, wait a second if self.protocol.transport is None: return task.deferLater(reactor, 1, self.bootstrap, addrs) def initTable(results): nodes = [] for addr, result in results.items(): if result[0]: nodes.append(Node(result[1], addr[0], addr[1])) spider = NodeSpiderCrawl(self.protocol, self.node, nodes, self.ksize, self.alpha) return spider.find() ds = {} for addr in addrs: ds[addr] = self.protocol.ping(addr, self.node.id) d = deferredDict(ds) d.addCallback(initTable) d.addErrback(self.onError) return d def inetVisibleIP(self): """ Get the internet visible IP's of this node as other nodes see it. Returns: A `list` of IP's. If no one can be contacted, then the `list` will be empty. """ def handle(results): ips = [ result[1][0] for result in results if result[0] ] self.log.debug("other nodes think our ip is %s" % str(ips)) return ips ds = [] for neighbor in self.bootstrappableNeighbors(): ds.append(self.protocol.stun(neighbor)) d = defer.gatherResults(ds) d.addCallback(handle) d.addErrback(self.onError) return d def get(self, key): """ Get a key if the network has it. Returns: :class:`None` if not found, the value otherwise. """ dkey = digest(key) # if this node has it, return it if self.storage.get(dkey) is not None: return defer.succeed(self.storage.get(dkey)) node = Node(dkey) nearest = self.protocol.router.findNeighbors(node) if len(nearest) == 0: self.log.warning("There are no known neighbors to get key %s" % key) return defer.succeed(None) spider = ValueSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) return spider.find() def set(self, key, value): """ Set the given key to the given value in the network. """ self.log.debug("setting '%s' = '%s' on network" % (key, value)) dkey = digest(key) node = Node(dkey) def store(nodes): self.log.info("setting '%s' on %s" % (key, list(map(str, nodes)))) # if this node is close too, then store here as well if self.node.distanceTo(node) < max([n.distanceTo(node) for n in nodes]): self.storage[dkey] = value ds = [self.protocol.callStore(n, dkey, value) for n in nodes] d = defer.DeferredList(ds) d.addCallback(self._anyRespondSuccess) d.addErrback(self.onError) return d nearest = self.protocol.router.findNeighbors(node) if len(nearest) == 0: self.log.warning("There are no known neighbors to set key %s" % key) return defer.succeed(False) spider = NodeSpiderCrawl(self.protocol, node, nearest, self.ksize, self.alpha) d = spider.find() d.addCallback(store) d.addErrback(self.onError) return d def _anyRespondSuccess(self, responses): """ Given the result of a DeferredList of calls to peers, ensure that at least one of them was contacted and responded with a Truthy result. """ for deferSuccess, result in responses: peerReached, peerResponse = result if deferSuccess and peerReached and peerResponse: return True return False def saveState(self, fname): """ Save the state of this node (the alpha/ksize/id/immediate neighbors) to a cache file with the given fname. """ data = { 'ksize': self.ksize, 'alpha': self.alpha, 'id': self.node.id, 'neighbors': self.bootstrappableNeighbors() } if len(data['neighbors']) == 0: self.log.warning("No known neighbors, so not writing to cache.") return with open(fname, 'wb') as f: pickle.dump(data, f) @classmethod def loadState(self, fname): """ Load the state of this node (the alpha/ksize/id/immediate neighbors) from a cache file with the given fname. """ with open(fname, 'rb') as f: if sys.version_info[0] == 2: # python 2 data = pickle.load(f) else: # python 3 data = pickle.load(f, encoding='latin1') if not isinstance(data['id'], bytes): # first py3 unpickle data['id'] = data['id'].encode("latin-1") # fix bytes s = Server(data['ksize'], data['alpha'], data['id']) if len(data['neighbors']) > 0: s.bootstrap(data['neighbors']) return s def saveStateRegularly(self, fname, frequency=600): """ Save the state of node with a given regularity to the given filename. Args: fname: File name to save retularly to frequencey: Frequency in seconds that the state should be saved. By default, 10 minutes. """ loop = LoopingCall(self.saveState, fname) loop.start(frequency) return loop
class KademliaProtocol(RPCProtocol): def __init__(self, sourceNode, storage, ksize): RPCProtocol.__init__(self) self.router = RoutingTable(self, ksize, sourceNode) self.storage = storage self.sourceNode = sourceNode self.log = Logger(system=self) def getRefreshIDs(self): """ Get ids to search for to keep old buckets up to date. """ ids = [] for bucket in self.router.getLonelyBuckets(): ids.append(random.randint(*bucket.range)) return ids def rpc_stun(self, sender): return sender def rpc_ping(self, sender, nodeid): source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) return self.sourceNode.id def rpc_store(self, sender, nodeid, key, value): source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) self.log.debug("got a store request from %s, storing value" % str(sender)) self.storage[key] = value return True def rpc_find_node(self, sender, nodeid, key): self.log.info("finding neighbors of %i in local table" % long(binascii.hexlify(nodeid), 16)) source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) node = Node(key) return list(map(tuple, self.router.findNeighbors(node, exclude=source))) def rpc_find_value(self, sender, nodeid, key): source = Node(nodeid, sender[0], sender[1]) self.welcomeIfNewNode(source) value = self.storage.get(key, None) if value is None: return self.rpc_find_node(sender, nodeid, key) return {"value": value} def callFindNode(self, nodeToAsk, nodeToFind): address = (nodeToAsk.ip, nodeToAsk.port) d = self.find_node(address, self.sourceNode.id, nodeToFind.id) d.addCallback(self.handleCallResponse, nodeToAsk) d.addErrback(self.onError) return d def callFindValue(self, nodeToAsk, nodeToFind): address = (nodeToAsk.ip, nodeToAsk.port) d = self.find_value(address, self.sourceNode.id, nodeToFind.id) d.addCallback(self.handleCallResponse, nodeToAsk) d.addErrback(self.onError) return d def callPing(self, nodeToAsk): address = (nodeToAsk.ip, nodeToAsk.port) d = self.ping(address, self.sourceNode.id) d.addCallback(self.handleCallResponse, nodeToAsk) d.addErrback(self.onError) return d def callStore(self, nodeToAsk, key, value): address = (nodeToAsk.ip, nodeToAsk.port) d = self.store(address, self.sourceNode.id, key, value) d.addCallback(self.handleCallResponse, nodeToAsk) d.addErrback(self.onError) return d def onError(self, err): self.log.error(repr(err)) return err def welcomeIfNewNode(self, node): """ Given a new node, send it all the keys/values it should be storing, then add it to the routing table. @param node: A new node that just joined (or that we just found out about). Process: For each key in storage, get k closest nodes. If newnode is closer than the furtherst in that list, and the node for this server is closer than the closest in that list, then store the key/value on the new node (per section 2.5 of the paper) """ if self.router.isNewNode(node): ds = [] for key, value in self.storage.iteritems(): keynode = Node(digest(key)) neighbors = self.router.findNeighbors(keynode) if len(neighbors) > 0: newNodeClose = node.distanceTo(keynode) < neighbors[-1].distanceTo(keynode) thisNodeClosest = self.sourceNode.distanceTo(keynode) < neighbors[0].distanceTo(keynode) if len(neighbors) == 0 or (newNodeClose and thisNodeClosest): ds.append(self.callStore(node, key, value)) self.router.addContact(node) return defer.gatherResults(ds) def handleCallResponse(self, result, node): """ If we get a response, add the node to the routing table. If we get no response, make sure it's removed from the routing table. """ if result[0]: self.log.info("got response from %s, adding to router" % node) self.welcomeIfNewNode(node) else: self.log.debug("no response from %s, removing from router" % node) self.router.removeContact(node) return result