class Network(EventReactorMixin): '''Network controller :CVariables: MAX_UDP_PACKET_SIZE The maximum UDP packet size allowed DEFAULT_TIMEOUT The time in seconds before a reply is timed out STREAM_DATA_SIZE The size in bytes of the parts of the file transmitted ''' MAX_UDP_PACKET_SIZE = 65507 # bytes DEFAULT_TIMEOUT = 10 # seconds STREAM_DATA_SIZE = 1024 # bytes SEQUENCE_ID_SIZE = 20 # bytes DEFAULT_POOL_SIZE = 20 def __init__(self, event_reactor, address=('127.0.0.1', 0)): EventReactorMixin.__init__(self, event_reactor) self._server = UDPServer(event_reactor, address=address) # By passing in the same socket object to the client, this method # allows other nodes to reply to our server's port. self._client = UDPClient(socket_obj=self._server.socket) self._reply_table = ReplyTable() self._downloads = {} self._pool_executor = WrappedThreadPoolExecutor( Network.DEFAULT_POOL_SIZE, event_reactor) self._event_scheduler = EventScheduler(event_reactor) self._transfer_timer_id = EventID(self, 'Clean transfers') self._running = True self._register_handlers() self._server.start() @property def server_address(self): '''The address of the server''' return self._server.server_address def _register_handlers(self): '''Register the event callbacks''' self.event_reactor.register_handler(UDP_INBOUND_EVENT, self._udp_incoming_callback) self.event_reactor.register_handler(EventReactor.STOP_ID, self._stop_callback) self.event_reactor.register_handler(self._transfer_timer_id, self._clean_download) def _stop_callback(self, event_id): '''Stop and expire everything''' self._running = False for transfer_id in list(self._downloads.keys()): download_task = self._downloads[transfer_id] del self._downloads[transfer_id] download_task.transfer(None) for key in list(self._reply_table.out_table.keys()): event = self._reply_table.out_table[key] event.set() def _clean_download(self, event_id, transfer_id): '''Remove timed out file download''' download_task = self._downloads[transfer_id] last_modified = download_task.last_modified timeout = download_task.timeout if last_modified + timeout < time.time(): _logger.debug('Cleaned out download %s', transfer_id) del self._downloads[transfer_id] download_task.transfer(None) else: _logger.debug('Still alive download %s', transfer_id) self._event_scheduler.add_one_shot(timeout, self._transfer_timer_id, transfer_id) def _udp_incoming_callback(self, event_id, address, data): '''udp incoming''' if not self._running: return _logger.debug('UDP %s←%s %s', self.server_address, address, data[:160]) packet_dict = self._unpack_udp_data(data) if not packet_dict: return data_packet = DataPacket(address, packet_dict, packet_dict.get(JSONKeys.SEQUENCE_ID) \ or packet_dict.get(JSONKeys.REPLY_SEQUENCE_ID)) if JSONKeys.REPLY_SEQUENCE_ID in packet_dict: self._accept_reply(data_packet) elif JSONKeys.TRANSFER_ID in packet_dict: self._accept_transfer(data_packet) else: self._accept_packet(data_packet) def _accept_packet(self, data_packet): self.receive_callback(data_packet) def receive_callback(self, data_packet): '''The function called when a data packet arrives. :Parameters: data_packet: :class:`DataPacket` The incoming data packet This function is called for packets that are not replies. Implementors of this class should override this method. ''' raise NotImplementedError() def expect_incoming_transfer(self, transfer_id, timeout=DEFAULT_TIMEOUT, download_task_class=None, max_size=None): '''Allow a transfer for download. :Parameters: transfer_id: ``str`` A transfer id that the other client use for transferring data. timeout: ``int`` ``float`` Time in seconds before the transfer times out. max_size: ``int`` ``None`` The maximum file size. :rtype: :class:`DownloadTask` :return: A future that returns a file object that may have been interrupted. The progress is the number of bytes downloaded. ''' download_task_class = download_task_class or DownloadTask download_task = download_task_class(max_size=max_size) self._downloads[transfer_id] = download_task self._event_scheduler.add_one_shot(timeout, self._transfer_timer_id, transfer_id) self._pool_executor.submit(download_task) return download_task def _accept_reply(self, data_packet): '''Process a reply and allow a future to resume''' sequence_id = data_packet.sequence_id address = data_packet.address event = self._reply_table.get_out_entry(sequence_id, address) if not event: _logger.debug('Unknown seq id %s, packet discarded', sequence_id) return self._reply_table.remove_out_entry(sequence_id, address) self._reply_table.add_in_entry(sequence_id, address, data_packet) event.set() def _accept_transfer(self, data_packet): '''Process a file download''' transfer_id = data_packet.dict_obj[JSONKeys.TRANSFER_ID] if transfer_id in self._downloads: if JSONKeys.TRANSFER_DATA in data_packet.dict_obj: self._read_download(data_packet, transfer_id) return _logger.debug('Transfer discarded') def _read_download(self, data_packet, transfer_id): '''Read data''' _logger.debug('Read download') download_task = self._downloads[transfer_id] data_str = data_packet.dict_obj[JSONKeys.TRANSFER_DATA] download_task.address = data_packet.address if data_str is None: download_task.transfer(None) _logger.debug('Read download finished') return else: try: data = base64.b64decode(data_str.encode()) except binascii.Error as e: _logger.debug('Decode error %s', e) return download_task.transfer(data) _logger.debug('Read download len=%d', len(data)) if download_task.is_running: d = { JSONKeys.TRANSFER_ID: transfer_id } self.send_answer_reply(data_packet, d) else: _logger.debug('Download aborted') def _pack_udp_data(self, packet_dict): '''Pack the dict into a format suitable for transmission. The format currently is JSON. ''' data = zlib.compress(json.dumps(packet_dict).encode()) if len(data) < Network.MAX_UDP_PACKET_SIZE: _logger.debug('Packed data %s', data[:20]) return data else: raise Exception('data size too large') def _unpack_udp_data(self, data): '''Convert the data into a dict''' try: dict_obj = json.loads(zlib.decompress(data).decode()) except Exception as e: _logger.debug('Failed json parsing %s', e) return if not isinstance(dict_obj, dict): _logger.debug('Not a dict') return return dict_obj def send(self, address, dict_obj, timeout=None): '''Send the ``dict`` to address :Parameters: address: ``tuple`` A 2-tuple with the host and port number. dict_obj: ``dict`` The ``dict`` that will be converted to JSON format. timeout: ``None``, ``int``, ``float``, ``True`` If `timeout` is a number, the class will attempt to ensure delivery and wait for a reply. A future will be returned. If ``True``, the default timeout will be used. :rtype: ``None``, :class:`SendPacketTask` :return: Returns a :class:`SendPacketTask` if timeout is given. The result is either :class:`DataPacket` or ``None``. ''' if timeout is None: self._send_plain(address, dict_obj) else: if timeout is True: timeout = Network.DEFAULT_TIMEOUT return self._send_expect_reply(address, dict_obj, timeout) def _send_plain(self, address, dict_obj): '''Send the data as a single UDP packet''' _logger.debug('Dict %s→%s', self.server_address, address) self._client.send(address, self._pack_udp_data(dict_obj)) def _send_expect_reply(self, address, dict_obj, timeout=DEFAULT_TIMEOUT): '''Send the data and wait for a reply :rtype: :class:`SendPacketTask` ''' _logger.debug('Dict %s→%s timeout=%d', self.server_address, address, timeout) sequence_id = self.new_sequence_id() event = threading.Event() self._reply_table.add_out_entry(sequence_id, address, event) packet_dict = dict_obj.copy() packet_dict[JSONKeys.SEQUENCE_ID] = sequence_id def send_fn(): self._client.send(address, self._pack_udp_data(packet_dict)) send_packet_task = SendPacketTask(send_fn, sequence_id, address, self._reply_table, event, timeout) self._pool_executor.submit(send_packet_task) return send_packet_task def send_answer_reply(self, source_data_packet, dict_obj): '''Send ``dict`` that is a response to a incoming data packet :Parameters: source_data_packet: :class:`DataPacket` The original incoming data packet to respond to. dict_obj: ``dict`` The data to send back Use this function to reply to packets that expect a response. This function automatically adds sequence IDs the reply packet. ''' address = source_data_packet.address sequence_id = source_data_packet.sequence_id _logger.debug('Dict reply %s→%s seq_id=%s', self.server_address, address, sequence_id) packet_dict = dict_obj.copy() packet_dict[JSONKeys.REPLY_SEQUENCE_ID] = sequence_id self._client.send(address, self._pack_udp_data(packet_dict)) def send_bytes(self, address, transfer_id, bytes_, timeout=DEFAULT_TIMEOUT): '''Transfer data to another client. :Parameters: address: ``tuple`` A 2-tuple with host and port number. bytes_: ``bytes`` The data to send. timeout: ``int``, ``float`` The time in seconds before the transfer times out. transfer_id: ``str``, ``None`` The transfer ID to be used. If ``None``, an ID will be created automatically. :see: :func:`send_file` :rtype: :class:`UploadTask` ''' f = io.BytesIO(bytes_) return self.send_file(address, transfer_id, f, timeout) def send_file(self, address, transfer_id, file_, timeout=DEFAULT_TIMEOUT): '''Transfer data to another client. :Parameters: address: ``tuple`` A 2-tuple with host and port number. file_: ``str``, ``object`` A filename or a file-like object which has ``read``. timeout: ``int``, ``float`` The time in seconds before the transfer times out. transfer_id: ``str``, ``None`` The transfer ID to be used. If ``None``, an ID will be created automatically. :rtype: :class:`UploadTask` :return: A future that returns an ``int`` that is the number of bytes sent. ''' if hasattr(file_, 'read'): source_file = file_ else: source_file = open(file_, 'rb') transfer_id = transfer_id or self.new_sequence_id() _logger.debug('Send file %s→%s', self.server_address, address) upload_task = UploadTask(self, address, source_file, transfer_id, timeout) self._pool_executor.submit(upload_task) return upload_task def new_sequence_id(self): '''Generate a new sequence ID. :rtype: ``str`` ''' return bytes_to_b64(os.urandom(Network.SEQUENCE_ID_SIZE))
class DHTNetwork(EventReactorMixin): '''The distributed hash table network :CVariables: NETWORK_ID The unique network id reserved only use in the Bytestag network. ''' NETWORK_ID = 'BYTESTAG' MAX_VALUE_SIZE = 1048576 # 1 MB NETWORK_PARALLELISM = 3 # constant alpha TIME_EXPIRE = 86490 # seconds. time-to-live from original publication date TIME_REFRESH = 3600 # seconds. time to refresh unaccessed bucket TIME_REPLICATE = 3600 # seconds. interval between replication events TIME_REPUBLISH = 86400 # seconds. time after original publisher must # republish def __init__(self, event_reactor, kvp_table, node_id=None, network=None, download_slot=None): '''Init :Parameters: event_reactor : :class:`.EventReactor` The Event Reactor kvp_table : :class:`.KVPTable` The storage node_id : :class:`.KeyBytes` A key to be used as the node id. ''' EventReactorMixin.__init__(self, event_reactor) self._network = network or Network(event_reactor) self._network.receive_callback = self._receive_callback self._routing_table = RoutingTable() self._key = node_id or KeyBytes() self._pool_executor = WrappedThreadPoolExecutor( Network.DEFAULT_POOL_SIZE / 2, event_reactor) self._kvp_table = kvp_table self._event_scheduler = EventScheduler(event_reactor) self._refresh_timer_id = EventID(self, 'Refresh') self._download_slot = download_slot or FnTaskSlot() self._setup_timers() def _setup_timers(self): self._event_scheduler.add_periodic(DHTNetwork.TIME_REFRESH / 4, self._refresh_timer_id) self._event_reactor.register_handler(self._refresh_timer_id, self._refresh_buckets) @property def routing_table(self): '''The routing table :rtype: :class:`.RoutingTable` ''' return self._routing_table @property def key(self): '''The node id :rtype: :class:`.KeyBytes` ''' return self._key @property def node(self): '''The node info :rtype: `Node` ''' return Node(self._key, self.address) @property def address(self): '''The address of the server :return: A ``tuple`` holding host and port number. :rtype: ``tuple`` ''' return self._network.server_address @property def download_slot(self): '''The :class:`.FnTaskSlot` which holds :class:`.ReadStoreFromNodeTask`.''' return self._download_slot def _template_dict(self): '''Return a new dict holding common stuff like network id''' d = { JSONKeys.NETWORK_ID: DHTNetwork.NETWORK_ID, JSONKeys.NODE_ID: self._key.base64, } return d def _receive_callback(self, data_packet): '''An incoming packet callback''' dict_obj = data_packet.dict_obj if dict_obj.get(JSONKeys.NETWORK_ID) != DHTNetwork.NETWORK_ID: _logger.debug('Unknown network id, discarding. %s←%s', self.address, data_packet.address) return self._update_routing_table_from_data_packet(data_packet) rpc_name = dict_obj.get(JSONKeys.RPC) rpc_map = { JSONKeys.RPCs.PING: self._received_ping_rpc, JSONKeys.RPCs.FIND_NODE: self._received_find_node_rpc, JSONKeys.RPCs.FIND_VALUE: self._received_find_value_rpc, JSONKeys.RPCs.GET_VALUE: self._received_get_value_rpc, JSONKeys.RPCs.STORE: self._received_store_rpc, } fn = rpc_map.get(rpc_name) if fn: _logger.debug('Got rpc %s', rpc_name) fn(data_packet) else: _logger.debug('Received unknown rpc %s', rpc_name) def join_network(self, address): '''Join the network :rtype: :class:`JoinNetworkTask` :return: A future that returns ``bool``. If ``True``, the join was successful. ''' _logger.debug('Join %s→%s', self.address, address) join_network_task = JoinNetworkTask(self, address) self._pool_executor.submit(join_network_task) return join_network_task def ping_address(self, address): '''Ping an address :rtype: :class:`PingTask` :return: A future which returns ``bool`` or a tuple of (``float``, `Node`). If a tuple is returned, the ping was successful. The items represents the ping time and the node. ''' _logger.debug('Ping %s→%s', self.address, address) ping_task = PingTask(address, self) self._pool_executor.submit(ping_task) return ping_task def ping_node(self, node): '''Ping a node :see: `ping_address` :rtype: :class:`PingTask` ''' return self.ping_address(node.address) def _received_ping_rpc(self, data_packet): '''Ping RPC callback''' _logger.debug('Pong %s→%s', self.address, data_packet.address) d = self._template_dict() self._network.send_answer_reply(data_packet, d) def find_nodes_from_node(self, node, key): '''Find the closest nodes to a key :rtype: :class:`FindNodesFromNodeTask` :return: A future which returns a `NodeList` or ``None``. ''' _logger.debug('Find node %s→%s %s', self.node, node, key) find_nodes_from_node_task = FindNodesFromNodeTask(self, node, key) self._pool_executor.submit(find_nodes_from_node_task) return find_nodes_from_node_task def find_value_from_node(self, node, key, index=None): '''Ask a node about values for a key :Parameters: node: `Node` The node to be contacted key: :class:`.KeyBytes` The key of the value index: :class:`.KeyBytes`, ``None`` If given, the request will be filtered to that given index. :rtype: :class:`FindValueFromNodeTask` :return: A future which returns a `FindValueFromNodeResult` or ``None``. ''' _logger.debug('Find value %s:%s %s→%s', key, index, self.node, node) find_value_from_node_task = FindValueFromNodeTask(self, node, key, index) self._pool_executor.submit(find_value_from_node_task) return find_value_from_node_task def _received_find_node_rpc(self, data_packet): '''Find node RPC callback''' _logger.debug('Find node %s←%s', self.address, data_packet.address) key_obj = KeyBytes.new_silent(data_packet.dict_obj.get(JSONKeys.KEY)) if not key_obj: _logger.debug('Find node %s←%s bad key', self.address, data_packet.address) return self._reply_find_node(data_packet, key_obj) def _reply_find_node(self, data_packet, key_obj): '''Reply to a find node rpc''' nodes = self._routing_table.get_close_nodes(key_obj, Bucket.MAX_BUCKET_SIZE) node_list = NodeList(nodes).to_json_dumpable() d = self._template_dict() d[JSONKeys.NODES] = node_list _logger.debug('Find node reply %s→%s len=%d', self.address, data_packet.address, len(node_list)) self._network.send_answer_reply(data_packet, d) def _received_find_value_rpc(self, data_packet): '''Find value rpc callback''' _logger.debug('Find value %s←%s', self.address, data_packet.address) key = KeyBytes.new_silent(data_packet.dict_obj.get(JSONKeys.KEY, 'fake')) index = KeyBytes.new_silent(data_packet.dict_obj.get(JSONKeys.INDEX)) if not key: _logger.debug('Find value %s←%s bad key', self.address, data_packet.address) return _logger.debug('Find value %s←%s k=%s i=%s', self.address, data_packet.address, key, index) kvpid = KVPID(key, index) if index and kvpid in self._kvp_table: kvp_record = self._kvp_table.record(kvpid) d = self._template_dict() d[JSONKeys.VALUES] = KVPExchangeInfoList([ KVPExchangeInfo.from_kvp_record(kvp_record) ]).to_json_dumpable() self._network.send_answer_reply(data_packet, d) elif self._kvp_table.indices(key): kvp_record_list = self._kvp_table.records_by_key(key) d = self._template_dict() d[JSONKeys.VALUES] = KVPExchangeInfoList.from_kvp_record_list( kvp_record_list).to_json_dumpable() self._network.send_answer_reply(data_packet, d) else: self._reply_find_node(data_packet, key) def find_node_shortlist(self, key): '''Return nodes close to a key :rtype: :class:`FindShortlistTask` ''' _logger.debug('Find nodes k=%s', key) find_shortlist_task = FindShortlistTask(self, key, find_nodes=True) self._pool_executor.submit(find_shortlist_task) return find_shortlist_task def find_value_shortlist(self, key, index=None): '''Return nodes close to a key and may have the value :rtype: :class:`FindShortlistTask` ''' _logger.debug('Find value k=%s', key) find_shortlist_task = FindShortlistTask(self, key, index=index, find_nodes=False) self._pool_executor.submit(find_shortlist_task) return find_shortlist_task def _data_packet_to_node(self, data_packet): '''Extract node info from a packet :rtype: :class:`Node` ''' address = data_packet.address try: node_key = KeyBytes(data_packet.dict_obj.get(JSONKeys.NODE_ID)) except Exception as e: _logger.debug('Ignore key error %s', e) return return Node(node_key, address) def _update_routing_table_from_data_packet(self, data_packet): '''Extract node and update routing table from a data packet''' node = self._data_packet_to_node(data_packet) if node: self._update_routing_table(node) def _update_routing_table(self, node): '''Update the routing table with this node. The node must have contacted us or it has responded. ''' if node.key == self._key: _logger.debug('Ignore node %s with our id on routing table update', node) return try: self._routing_table.node_update(node) except BucketFullError as e: bucket = e.bucket old_node = e.node self._update_full_bucket(bucket, old_node, node) @asynchronous(name='update_full_bucket') def _update_full_bucket(self, bucket, old_node, new_node): '''A full bucket callback that will ping and update the buckets''' _logger.debug('Update routing table, bucket=%s full', bucket) future = self.ping_node(old_node) has_responded = future.result() if not has_responded: _logger.debug('Bucket %s drop %s add %s', bucket, old_node, new_node) bucket.keep_new_node() else: _logger.debug('Bucket %s keep %s ignore %s', bucket, old_node, new_node) bucket.keep_old_node() def get_value_from_node(self, node, key, index=None, offset=None): '''Download, from a node, data value associated to the key :rtype: :class:`.DownloadTask` ''' transfer_id = self._network.new_sequence_id() d = self._template_dict() d[JSONKeys.RPC] = JSONKeys.RPCs.GET_VALUE d[JSONKeys.KEY] = key.base64 d[JSONKeys.INDEX] = index.base64 if index else key.base64 d[JSONKeys.TRANSFER_ID] = transfer_id if offset: d[JSONKeys.VALUE_OFFSET] = offset task = self._network.expect_incoming_transfer(transfer_id) _logger.debug('Get value %s→%s transfer_id=%s', self.node, node, transfer_id) self._network.send(node.address, d) return task @asynchronous(name='received_get_value_rpc') def _received_get_value_rpc(self, data_packet): '''Get value rpc calllback''' _logger.debug('Get value %s←%s', self.address, data_packet.address) self._update_routing_table_from_data_packet(data_packet) key = KeyBytes.new_silent(data_packet.dict_obj[JSONKeys.KEY]) index = KeyBytes.new_silent(data_packet.dict_obj[JSONKeys.INDEX]) transfer_id = data_packet.dict_obj.get(JSONKeys.TRANSFER_ID) if not transfer_id: _logger.debug('Missing transfer id') return try: offset = data_packet.dict_obj.get(JSONKeys.VALUE_OFFSET, 0) except TypeError as e: _logger.debug('Offset parse error %s', e) return kvpid = KVPID(key, index) if not kvpid in self._kvp_table: _logger.debug('KeyBytes not in cache') return data = self._kvp_table[kvpid] task = self._network.send_bytes(data_packet.address, transfer_id, data[offset:]) bytes_sent = task.result() _logger.debug('Sent %d bytes', bytes_sent) def store_to_node(self, node, key, index, bytes_, timestamp): '''Send data to node. :rtype: :class:`StoreToNodeTask` ''' _logger.debug('Store value %s→%s', self.node, node) store_to_node_task = StoreToNodeTask(self, node, key, index, bytes_, timestamp) self._pool_executor.submit(store_to_node_task) return store_to_node_task @asynchronous(name='received_store_rpc') def _received_store_rpc(self, data_packet): '''Received store RPC''' _logger.debug('Store value %s←%s', self.address, data_packet.address) dict_obj = data_packet.dict_obj # FIXME: validation key = KeyBytes(dict_obj[JSONKeys.KEY]) index = KeyBytes(dict_obj[JSONKeys.INDEX]) size = int(dict_obj[JSONKeys.SIZE]) timestamp = int(dict_obj[JSONKeys.TIMESTAMP]) d = self._template_dict() kvpid = KVPID(key, index) if self._kvp_table.is_acceptable(kvpid, size, timestamp): transfer_id = self._network.new_sequence_id() download_task = self._download_slot.add( self._network.expect_incoming_transfer, transfer_id, max_size=DHTNetwork.MAX_VALUE_SIZE, download_task_class=ReadStoreFromNodeTask) download_task.key = kvpid.key download_task.index = kvpid.index download_task.total_size = size d[JSONKeys.TRANSFER_ID] = transfer_id self._network.send_answer_reply(data_packet, d) _logger.debug('Store value %s←%s begin read', self.address, data_packet.address) file = download_task.result() _logger.debug('Store value %s←%s received data', self.address, data_packet.address) data = file.read() if index.validate_value(data): self._kvp_table[kvpid] = data kvp_record = self._kvp_table.record(kvpid) kvp_record.timestamp = timestamp kvp_record.last_update = time.time() kvp_record.time_to_live = self._calculate_expiration_time(key) else: self._network.send_answer_reply(data_packet, d) def _calculate_expiration_time(self, key): '''Return the expiration time for a given key''' bucket_number = compute_bucket_number(self.key, key) num_contacts = sum( [len(self.routing_table[i]) for i in range(bucket_number)]) num_bucket_contacts = self._routing_table.count_close(key) c = num_contacts + num_bucket_contacts if c < Bucket.MAX_BUCKET_SIZE == 0: return DHTNetwork.TIME_EXPIRE else: return DHTNetwork.TIME_EXPIRE / math.exp( c / Bucket.MAX_BUCKET_SIZE) @asynchronous(name='refresh buckets') def _refresh_buckets(self, event_id): for bucket in self._routing_table.buckets: if bucket.last_update + DHTNetwork.TIME_REFRESH < time.time(): key = random_bucket_key(self.node.key, bucket.number) task = self.find_node_shortlist(key) task.result() def store_value(self, key, index): '''Publish or replicate value to nodes. :rtype: :class:`StoreValueTask` ''' _logger.debug('Store value %s:%s', key, index) store_value_task = StoreValueTask(self, key, index) self._pool_executor.submit(store_value_task) return store_value_task def get_value(self, key, index): get_value_task = GetValueTask(self, key, index) def f(): self._pool_executor.submit(get_value_task) return get_value_task self._download_slot.add(f) return get_value_task