class RoutingManager(object): def __init__(self, my_node, msg_f, bootstrapper): self.my_node = my_node self.msg_f = msg_f self.bootstrapper = bootstrapper self.table = RoutingTable(my_node, NODES_PER_BUCKET) # maintenance variables self._next_stale_maintenance_index = 0 self._maintenance_mode = FILL_BUCKETS self._replacement_queue = _ReplacementQueue(self.table) self._query_received_queue = _QueryReceivedQueue(self.table) self._found_nodes_queue = _FoundNodesQueue(self.table) self._maintenance_tasks = [ self._ping_a_staled_rnode, self._ping_a_query_received_node, self._ping_a_found_node, self._ping_a_replacement_node, ] self._num_pending_filling_lookups = NUM_FILLING_LOOKUPS self._num_timeouts_in_a_row = 0 def _get_maintenance_lookup(self, lookup_target=None, nodes=[]): if not lookup_target: lookup_target = identifier.RandomId() if not nodes: log_distance = lookup_target.distance(self.my_node.id).log nodes = self.get_closest_rnodes(log_distance, 0, True) return lookup_target, nodes def do_maintenance(self): queries_to_send = [] maintenance_lookup = None maintenance_delay = 0 if self._maintenance_mode == FILL_BUCKETS: #TODO: kill if self._num_pending_filling_lookups: self._num_pending_filling_lookups -= 1 maintenance_lookup = self._get_maintenance_lookup() else: self._maintenance_mode = NORMAL_MODE elif self._maintenance_mode == NORMAL_MODE: for _ in range(len(self._maintenance_tasks)): # We try maintenance tasks till one of them actually does work # or we have tried them all (whatever happens first) We loop # in range because I'm going to modify self._maintenance_tasks task = self._maintenance_tasks.pop(0) self._maintenance_tasks.append(task) node_ = task() if node_: queries_to_send.append(self._get_maintenance_query(node_)) # This task did do some work. We are done here! break if self.table.num_rnodes < MIN_RNODES: # Ping more found nodes when routing table has few nodes node_ = self._ping_a_found_node() if node_: queries_to_send.append( self._get_maintenance_query(node_, do_fill_up=True)) if not maintenance_delay: maintenance_delay = _MAINTENANCE_DELAY[self._maintenance_mode] return (maintenance_delay, queries_to_send, maintenance_lookup) def _ping_a_staled_rnode(self): starting_index = self._next_stale_maintenance_index result = None while not result: # Find a non-empty bucket sbucket = self.table.get_sbucket( self._next_stale_maintenance_index) m_bucket = sbucket.main self._next_stale_maintenance_index = ( self._next_stale_maintenance_index + 1) % (NUM_BUCKETS - 1) if m_bucket: rnode = m_bucket.get_stalest_rnode() if time.time() > rnode.last_seen + QUARANTINE_PERIOD: result = rnode if self._next_stale_maintenance_index == starting_index: # No node to be pinged in the whole table. break return result def _ping_a_found_node(self): node_ = self._found_nodes_queue.pop(0) if node_: logger.debug('pinging node found: %r', node_) return node_ def _ping_a_query_received_node(self): return self._query_received_queue.pop(0) def _ping_a_replacement_node(self): return self._replacement_queue.pop(0) def _get_maintenance_query(self, node_, do_fill_up=False): if do_fill_up or random.choice((False, True)): # 50% chance to send a find_node to fill up a non-full bucket target_log_distance = self.table.find_next_bucket_with_room_index( node_=node_) if target_log_distance: target = self.my_node.id.generate_close_id(target_log_distance) msg = self.msg_f.outgoing_find_node_query(node_, target, None) else: # Every bucket is full. We send a ping instead. msg = self.msg_f.outgoing_ping_query(node_) else: # 50% chance to send find_node with my id as target msg = self.msg_f.outgoing_find_node_query(node_, self.my_node.id, None) return msg def on_query_received(self, node_): ''' Return None when nothing to do Return a list of queries when queries need to be sent (the queries will be sent out by the caller) ''' self._num_timeouts_in_a_row = 0 log_distance = self.my_node.distance(node_).log if (log_distance > MAX_LOG_DISTANCE_TO_ADD_HARDCODED and self.bootstrapper.is_hardcoded(node_.addr)): return try: sbucket = self.table.get_sbucket(log_distance) except (IndexError): return # Got a query from myself. Just ignore it. m_bucket = sbucket.main r_bucket = sbucket.replacement if node_.ip in m_bucket.ips_in_table: rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: update rnode self._update_rnode_on_query_received(rnode) # This IP is in the table. Stop here to avoid multiple entries # with the same IP return # Now, consider adding this node to the routing table if m_bucket.there_is_room(): # There is room in the bucket: queue it self._query_received_queue.add(node_, log_distance) return # No room in the main routing table # Add to replacement table (if the bucket is not full) worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode \ and worst_rnode.timeouts_in_a_row() > MAX_NUM_TIMEOUTS: r_bucket.remove(worst_rnode) rnode = node_.get_rnode(log_distance) r_bucket.add(rnode) self._update_rnode_on_query_received(rnode) return def on_response_received(self, node_, rtt, nodes): self._num_timeouts_in_a_row = 0 if nodes: logger.debug('nodes found: %r', nodes) self._found_nodes_queue.add(nodes) logger.debug('on response received %f', rtt) log_distance = self.my_node.distance(node_).log if (log_distance > MAX_LOG_DISTANCE_TO_ADD_HARDCODED and self.bootstrapper.is_hardcoded(node_.addr)): return try: sbucket = self.table.get_sbucket(log_distance) except (IndexError): return # Got a response from myself. Just ignore it. m_bucket = sbucket.main r_bucket = sbucket.replacement rnode = m_bucket.get_rnode(node_) if node_.ip in m_bucket.ips_in_table: rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: update rnode self._update_rnode_on_response_received(rnode, rtt) self.bootstrapper.report_reachable( rnode.addr, time.time() - rnode.creation_ts) # This IP is in the table. Stop here to avoid multiple entries # with the same IP return # Now, consider adding this node to the routing table rnode = r_bucket.get_rnode(node_) if rnode: # node in replacement table # let's see whether there is room in the main self._update_rnode_on_response_received(rnode, rtt) #TODO: leave this for the maintenance task if m_bucket.there_is_room(): m_bucket.add(rnode) self.table.num_rnodes += 1 self._update_rnode_on_response_received(rnode, rtt) r_bucket.remove(rnode) return # The node is nowhere # Add to main table (if the bucket is not full) #TODO: check whether in replacement_mode if m_bucket.there_is_room(): rnode = node_.get_rnode(log_distance) m_bucket.add(rnode) self.table.num_rnodes += 1 self._update_rnode_on_response_received(rnode, rtt) return # The main bucket is full # Let's see whether this node's latency is good current_time = time.time() rnode_to_be_replaced = None m_bucket.rnodes.sort(key=attrgetter('rtt'), reverse=True) for rnode in m_bucket.rnodes: rnode_age = current_time - rnode.bucket_insertion_ts if rtt < rnode.rtt * (1 - (rnode_age / 7200)): # A rnode can only be replaced when the candidate node's RTT # is shorter by a factor. Over time, this factor # decreases. For instance, when rnode has been in the bucket # for 30 mins (1800 secs), a candidate's RTT must be at most # 25% of the rnode's RTT (ie. two times faster). After two # hours, a rnode cannot be replaced by this method. # print 'RTT replacement: newRTT: %f, oldRTT: %f, age: %f' % ( # rtt, rnode.rtt, current_time - rnode.bucket_insertion_ts) rnode_to_be_replaced = rnode break if rnode_to_be_replaced: m_bucket.remove(rnode_to_be_replaced) rnode = node_.get_rnode(log_distance) m_bucket.add(rnode) # No need to update table self.table.num_rnodes += 0 self._update_rnode_on_response_received(rnode, rtt) return # Get the worst node in replacement bucket and see whether # it's bad enough to be replaced by node_ worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode \ and worst_rnode.timeouts_in_a_row() > MAX_NUM_TIMEOUTS: # This node is better candidate than worst_rnode r_bucket.remove(worst_rnode) rnode = node_.get_rnode(log_distance) r_bucket.add(rnode) self._update_rnode_on_response_received(rnode, rtt) return def on_error_received(self, node_addr): # if self.bootstrapper.is_bootstrap_node(node_): # return return def on_timeout(self, node_): if not node_.id: # this is an overlay bootstrap node (no id). Ignore. return [] self._num_timeouts_in_a_row += 1 if self._num_timeouts_in_a_row > MAX_TIMEOUTS_IN_A_ROW: # stop, do not expell nodes from routing table return [] log_distance = self.my_node.distance(node_).log try: sbucket = self.table.get_sbucket(log_distance) except (IndexError): return [] # Got a timeout from myself, WTF? Just ignore. m_bucket = sbucket.main r_bucket = sbucket.replacement rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: kick it out self._update_rnode_on_timeout(rnode) m_bucket.remove(rnode) self.table.num_rnodes -= 1 for r_rnode in r_bucket.sorted_by_rtt(): self._replacement_queue.add(r_rnode) if r_bucket.there_is_room(): r_bucket.add(rnode) else: worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode: # Replace worst node in replacement table r_bucket.remove(worst_rnode) r_bucket.add(rnode) # Node is not in main table rnode = r_bucket.get_rnode(node_) if rnode: # Node in replacement table: just update rnode self._update_rnode_on_timeout(rnode) return [] def get_closest_rnodes(self, log_distance, num_nodes, exclude_myself): if not num_nodes: num_nodes = NODES_PER_BUCKET[log_distance] return self.table.get_closest_rnodes(log_distance, num_nodes, exclude_myself) def get_main_rnodes(self): return self.table.get_main_rnodes() def print_stats(self): self.table.print_stats() def print_table(self): self.table.print_table() def _update_rnode_on_query_received(self, rnode): """Register a query from node. You should call this method when receiving a query from this node. """ current_time = time.time() rnode.last_action_ts = time.time() rnode.msgs_since_timeout += 1 rnode.num_queries += 1 rnode.add_event(current_time, node.QUERY) rnode.last_seen = current_time def _update_rnode_on_response_received(self, rnode, rtt): """Register a reply from rnode. You should call this method when receiving a response from this rnode. """ rnode.rtt = rtt current_time = time.time() #rnode._reset_refresh_task() if rnode.in_quarantine: rnode.in_quarantine = \ rnode.last_action_ts < current_time - QUARANTINE_PERIOD rnode.last_action_ts = current_time rnode.num_responses += 1 rnode.add_event(time.time(), node.RESPONSE) rnode.last_seen = current_time def _update_rnode_on_timeout(self, rnode): """Register a timeout for this rnode. You should call this method when getting a timeout for this node. """ rnode.last_action_ts = time.time() rnode.msgs_since_timeout = 0 rnode.num_timeouts += 1 rnode.add_event(time.time(), node.TIMEOUT) def _worst_rnode(self, rnodes): max_num_timeouts = -1 worst_rnode_so_far = None for rnode in rnodes: num_timeouots = rnode.timeouts_in_a_row() if num_timeouots >= max_num_timeouts: max_num_timeouts = num_timeouots worst_rnode_so_far = rnode return worst_rnode_so_far
class RoutingManager(object): def __init__(self, my_node, msg_f, bootstrapper): self.my_node = my_node self.msg_f = msg_f self.bootstrapper = bootstrapper self.table = RoutingTable(my_node, NODES_PER_BUCKET) # maintenance variables self._next_stale_maintenance_index = 0 self._maintenance_mode = FILL_BUCKETS self._replacement_queue = _ReplacementQueue(self.table) self._query_received_queue = _QueryReceivedQueue(self.table) self._found_nodes_queue = _FoundNodesQueue(self.table) self._maintenance_tasks = [self._ping_a_staled_rnode, self._ping_a_query_received_node, self._ping_a_found_node, self._ping_a_replacement_node, ] self._num_pending_filling_lookups = NUM_FILLING_LOOKUPS self._num_timeouts_in_a_row = 0 def _get_maintenance_lookup(self, lookup_target=None, nodes=[]): if not lookup_target: lookup_target = identifier.RandomId() if not nodes: log_distance = lookup_target.distance(self.my_node.id).log nodes = self.get_closest_rnodes(log_distance, 0, True) return lookup_target, nodes def do_maintenance(self): queries_to_send = [] maintenance_lookup = None maintenance_delay = 0 if self._maintenance_mode == FILL_BUCKETS: #TODO: kill if self._num_pending_filling_lookups: self._num_pending_filling_lookups -= 1 maintenance_lookup = self._get_maintenance_lookup() else: self._maintenance_mode = NORMAL_MODE elif self._maintenance_mode == NORMAL_MODE: for _ in range(len(self._maintenance_tasks)): # We try maintenance tasks till one of them actually does work # or we have tried them all (whatever happens first) We loop # in range because I'm going to modify self._maintenance_tasks task = self._maintenance_tasks.pop(0) self._maintenance_tasks.append(task) node_ = task() if node_: queries_to_send.append(self._get_maintenance_query(node_)) # This task did do some work. We are done here! break if self.table.num_rnodes < MIN_RNODES: # Ping more found nodes when routing table has few nodes node_ = self._ping_a_found_node() if node_: queries_to_send.append(self._get_maintenance_query( node_, do_fill_up=True)) if not maintenance_delay: maintenance_delay = _MAINTENANCE_DELAY[self._maintenance_mode] return (maintenance_delay, queries_to_send, maintenance_lookup) def _ping_a_staled_rnode(self): starting_index = self._next_stale_maintenance_index result = None while not result: # Find a non-empty bucket sbucket = self.table.get_sbucket( self._next_stale_maintenance_index) m_bucket = sbucket.main self._next_stale_maintenance_index = ( self._next_stale_maintenance_index + 1) % (NUM_BUCKETS - 1) if m_bucket: rnode = m_bucket.get_stalest_rnode() if time.time() > rnode.last_seen + QUARANTINE_PERIOD: result = rnode if self._next_stale_maintenance_index == starting_index: # No node to be pinged in the whole table. break return result def _ping_a_found_node(self): node_ = self._found_nodes_queue.pop(0) if node_: logger.debug('pinging node found: %r', node_) return node_ def _ping_a_query_received_node(self): return self._query_received_queue.pop(0) def _ping_a_replacement_node(self): return self._replacement_queue.pop(0) def _get_maintenance_query(self, node_, do_fill_up=False): if do_fill_up or random.choice((False, True)): # 50% chance to send a find_node to fill up a non-full bucket target_log_distance = self.table.find_next_bucket_with_room_index( node_=node_) if target_log_distance: target = self.my_node.id.generate_close_id(target_log_distance) msg = self.msg_f.outgoing_find_node_query(node_, target, None) else: # Every bucket is full. We send a ping instead. msg = self.msg_f.outgoing_ping_query(node_) else: # 50% chance to send find_node with my id as target msg = self.msg_f.outgoing_find_node_query(node_, self.my_node.id, None) return msg def on_query_received(self, node_): ''' Return None when nothing to do Return a list of queries when queries need to be sent (the queries will be sent out by the caller) ''' self._num_timeouts_in_a_row = 0 log_distance = self.my_node.distance(node_).log if (log_distance > MAX_LOG_DISTANCE_TO_ADD_HARDCODED and self.bootstrapper.is_hardcoded(node_.addr)): return try: sbucket = self.table.get_sbucket(log_distance) except(IndexError): return # Got a query from myself. Just ignore it. m_bucket = sbucket.main r_bucket = sbucket.replacement if node_.ip in m_bucket.ips_in_table: rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: update rnode self._update_rnode_on_query_received(rnode) # This IP is in the table. Stop here to avoid multiple entries # with the same IP return # Now, consider adding this node to the routing table if m_bucket.there_is_room(): # There is room in the bucket: queue it self._query_received_queue.add(node_, log_distance) return # No room in the main routing table # Add to replacement table (if the bucket is not full) worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode \ and worst_rnode.timeouts_in_a_row() > MAX_NUM_TIMEOUTS: r_bucket.remove(worst_rnode) rnode = node_.get_rnode(log_distance) r_bucket.add(rnode) self._update_rnode_on_query_received(rnode) return def on_response_received(self, node_, rtt, nodes): self._num_timeouts_in_a_row = 0 if nodes: logger.debug('nodes found: %r', nodes) self._found_nodes_queue.add(nodes) logger.debug('on response received %f', rtt) log_distance = self.my_node.distance(node_).log if (log_distance > MAX_LOG_DISTANCE_TO_ADD_HARDCODED and self.bootstrapper.is_hardcoded(node_.addr)): return try: sbucket = self.table.get_sbucket(log_distance) except(IndexError): return # Got a response from myself. Just ignore it. m_bucket = sbucket.main r_bucket = sbucket.replacement rnode = m_bucket.get_rnode(node_) if node_.ip in m_bucket.ips_in_table: rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: update rnode self._update_rnode_on_response_received(rnode, rtt) self.bootstrapper.report_reachable( rnode.addr, time.time() - rnode.creation_ts) # This IP is in the table. Stop here to avoid multiple entries # with the same IP return # Now, consider adding this node to the routing table rnode = r_bucket.get_rnode(node_) if rnode: # node in replacement table # let's see whether there is room in the main self._update_rnode_on_response_received(rnode, rtt) #TODO: leave this for the maintenance task if m_bucket.there_is_room(): m_bucket.add(rnode) self.table.num_rnodes += 1 self._update_rnode_on_response_received(rnode, rtt) r_bucket.remove(rnode) return # The node is nowhere # Add to main table (if the bucket is not full) #TODO: check whether in replacement_mode if m_bucket.there_is_room(): rnode = node_.get_rnode(log_distance) m_bucket.add(rnode) self.table.num_rnodes += 1 self._update_rnode_on_response_received(rnode, rtt) return # The main bucket is full # Let's see whether this node's latency is good current_time = time.time() rnode_to_be_replaced = None m_bucket.rnodes.sort(key=attrgetter('rtt'), reverse=True) for rnode in m_bucket.rnodes: rnode_age = current_time - rnode.bucket_insertion_ts if rtt < rnode.rtt * (1 - (rnode_age / 7200)): # A rnode can only be replaced when the candidate node's RTT # is shorter by a factor. Over time, this factor # decreases. For instance, when rnode has been in the bucket # for 30 mins (1800 secs), a candidate's RTT must be at most # 25% of the rnode's RTT (ie. two times faster). After two # hours, a rnode cannot be replaced by this method. # print 'RTT replacement: newRTT: %f, oldRTT: %f, age: %f' % ( # rtt, rnode.rtt, current_time - rnode.bucket_insertion_ts) rnode_to_be_replaced = rnode break if rnode_to_be_replaced: m_bucket.remove(rnode_to_be_replaced) rnode = node_.get_rnode(log_distance) m_bucket.add(rnode) # No need to update table self.table.num_rnodes += 0 self._update_rnode_on_response_received(rnode, rtt) return # Get the worst node in replacement bucket and see whether # it's bad enough to be replaced by node_ worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode \ and worst_rnode.timeouts_in_a_row() > MAX_NUM_TIMEOUTS: # This node is better candidate than worst_rnode r_bucket.remove(worst_rnode) rnode = node_.get_rnode(log_distance) r_bucket.add(rnode) self._update_rnode_on_response_received(rnode, rtt) return def on_error_received(self, node_addr): # if self.bootstrapper.is_bootstrap_node(node_): # return return def on_timeout(self, node_): if not node_.id: # this is an overlay bootstrap node (no id). Ignore. return [] self._num_timeouts_in_a_row += 1 if self._num_timeouts_in_a_row > MAX_TIMEOUTS_IN_A_ROW: # stop, do not expell nodes from routing table return [] log_distance = self.my_node.distance(node_).log try: sbucket = self.table.get_sbucket(log_distance) except (IndexError): return [] # Got a timeout from myself, WTF? Just ignore. m_bucket = sbucket.main r_bucket = sbucket.replacement rnode = m_bucket.get_rnode(node_) if rnode: # node in routing table: kick it out self._update_rnode_on_timeout(rnode) m_bucket.remove(rnode) self.table.num_rnodes -= 1 for r_rnode in r_bucket.sorted_by_rtt(): self._replacement_queue.add(r_rnode) if r_bucket.there_is_room(): r_bucket.add(rnode) else: worst_rnode = self._worst_rnode(r_bucket.rnodes) if worst_rnode: # Replace worst node in replacement table r_bucket.remove(worst_rnode) r_bucket.add(rnode) # Node is not in main table rnode = r_bucket.get_rnode(node_) if rnode: # Node in replacement table: just update rnode self._update_rnode_on_timeout(rnode) return [] def get_closest_rnodes(self, log_distance, num_nodes, exclude_myself): if not num_nodes: num_nodes = NODES_PER_BUCKET[log_distance] return self.table.get_closest_rnodes(log_distance, num_nodes, exclude_myself) def get_main_rnodes(self): return self.table.get_main_rnodes() def print_stats(self): self.table.print_stats() def print_table(self): self.table.print_table() def _update_rnode_on_query_received(self, rnode): """Register a query from node. You should call this method when receiving a query from this node. """ current_time = time.time() rnode.last_action_ts = time.time() rnode.msgs_since_timeout += 1 rnode.num_queries += 1 rnode.add_event(current_time, node.QUERY) rnode.last_seen = current_time def _update_rnode_on_response_received(self, rnode, rtt): """Register a reply from rnode. You should call this method when receiving a response from this rnode. """ rnode.rtt = rtt current_time = time.time() #rnode._reset_refresh_task() if rnode.in_quarantine: rnode.in_quarantine = \ rnode.last_action_ts < current_time - QUARANTINE_PERIOD rnode.last_action_ts = current_time rnode.num_responses += 1 rnode.add_event(time.time(), node.RESPONSE) rnode.last_seen = current_time def _update_rnode_on_timeout(self, rnode): """Register a timeout for this rnode. You should call this method when getting a timeout for this node. """ rnode.last_action_ts = time.time() rnode.msgs_since_timeout = 0 rnode.num_timeouts += 1 rnode.add_event(time.time(), node.TIMEOUT) def _worst_rnode(self, rnodes): max_num_timeouts = -1 worst_rnode_so_far = None for rnode in rnodes: num_timeouots = rnode.timeouts_in_a_row() if num_timeouots >= max_num_timeouts: max_num_timeouts = num_timeouots worst_rnode_so_far = rnode return worst_rnode_so_far