class NodeServer(object): def __init__(self, node_id, store_file=None, explicit_configuration=None, coordinator_addresses=[], var_directory='var', configuration_update_interval=300): super(NodeServer, self).__init__() if __debug__: logging.debug('node_id = %s, store_file = %s, explicit_configuration = %s, coordinators = %s, var_directory = %s', node_id, store_file, explicit_configuration, coordinator_addresses, var_directory) self.id = node_id self.node = None var_directory = paths.path(var_directory) store_file = store_file or os.path.join(var_directory, 'data', '%s.tcb' % self.id) self.__store = Store(store_file) self.__store.open() self.__node_clients = {} self.__internal_cluster_client = service.MulticastClient(InternalNodeServiceProtocol()) configuration_directory = os.path.join(var_directory, 'etc') self.__repair_task = None self.__background_repair_enabled = False self.__background_repair_interval_seconds = None self.__read_repair_enabled = False self.__configuration = None self.__configuration_controller = ConfigurationController('nodeserver-%s' % self.id, coordinator_addresses, explicit_configuration, configuration_directory, self.__update_configuration, configuration_update_interval) def __initialize_node_client_pool(self): nodes = self.__configuration.find_neighbour_nodes_for_node(self.node) if self.node else {} recycled_node_clients = {} for node_id, client in self.__node_clients.iteritems(): if node_id in nodes: recycled_node_clients[node_id] = client else: client.disconnect() new_node_clients = {} for node_id, node in nodes.iteritems(): if node_id not in recycled_node_clients: new_node_clients[node_id] = service.Client((node.address, node.port), InternalNodeServiceProtocol, tag=node_id) self.__node_clients = dict(recycled_node_clients, **new_node_clients) # Blocking, do this after setting self.__node_clients for client in new_node_clients.itervalues(): client.connect() def __update_configuration(self, new_configuration): if __debug__: logging.debug('New configuration: %s', new_configuration) self.__configuration = new_configuration deployment = None if self.id in new_configuration.active_deployment.nodes: deployment = new_configuration.active_deployment if new_configuration.target_deployment and self.id in new_configuration.target_deployment.nodes: deployment = new_configuration.target_deployment self.node = deployment.nodes.get(self.id, None) if deployment else None self.__read_repair_enabled = deployment.read_repair_enabled if deployment else False self.__background_repair_enabled = deployment.background_repair_enabled if deployment else False self.__background_repair_interval_seconds = deployment.background_repair_interval_seconds if deployment else 0 self.__initialize_node_client_pool() # TODO: restart servers if addresses changed def __fetch_value(self, key, node_id): if __debug__: logging.debug('key: %s, node_id: %s', key, node_id) return self.__clients_for_nodes((node_id,))[0].get(key) or (None, None) def __fetch_timestamps(self, key, node_ids): if __debug__: logging.debug('key: %s', key) node_ids = dict(node_ids) node_ids.pop(self.node.id, None) if not node_ids: return [] clients = self.__clients_for_nodes(node_ids) return self.__internal_cluster_client.stat(clients, key) def __clients_for_nodes(self, node_ids): return [self.__node_clients[node_id] for node_id in node_ids] def __propagate(self, key, timestamp, value, target_nodes): if __debug__: logging.debug('key: %s, target_nodes: %s', key, target_nodes) collector = self.__internal_cluster_client.set_collector(self.__clients_for_nodes(target_nodes), 1) self.__internal_cluster_client.set_async(collector, key, timestamp, value) def __read_repair(self, key, timestamp, value, node_ids): if __debug__: logging.debug('key: %s, timestamp: %s', key, timestamp) remote_timestamps = self.__fetch_timestamps(key, node_ids) if __debug__: logging.debug('remote: %s', [(client.tag, repr(remote_timestamp)) for client, remote_timestamp in remote_timestamps]) newer = [(client, remote_timestamp) for client, remote_timestamp in remote_timestamps if remote_timestamp and remote_timestamp > timestamp] if __debug__: logging.debug('newer: %s', [(client.tag, repr(remote_timestamp)) for client, remote_timestamp in newer]) if newer: latest_client, latest_timestamp = newer[-1] latest_timestamp, latest_value = self.__fetch_value(key, latest_client.tag) if __debug__: logging.debug('latest_timestamp: %s', latest_timestamp) if latest_timestamp and latest_value: value = latest_value timestamp = latest_timestamp older = [(client, remote_timestamp) for client, remote_timestamp in remote_timestamps if remote_timestamp != None and remote_timestamp < timestamp] if __debug__: logging.debug('older: %s', [(client.tag, repr(remote_timestamp)) for client, remote_timestamp in older]) if older: older_node_ids = [client.tag for (client, remote_timestamp) in older] self.__propagate(key, timestamp, value, older_node_ids) return timestamp, value def __internal_get(self, key): if __debug__: logging.debug('key: %s', key) timestamp, value = self.__store.get(key) return timestamp or 0, value def __internal_set(self, key, timestamp, value): if __debug__: logging.debug('key: %s', key) self.__store.set(key, timestamp, value) return timestamp def __internal_stat(self, key): if __debug__: logging.debug('key: %s', key) timestamp, value = self.__store.get(key) if __debug__: logging.debug('timestamp: %s', timestamp) return timestamp or 0 def __public_get(self, key): if __debug__: logging.debug("key: %s", key) timestamp, value = None, None node_ids = self.__configuration.find_nodes_for_key(key) if self.node.id in node_ids: timestamp, value = self.__store.get(key) if self.__read_repair_enabled: new_timestamp, new_value = self.__read_repair(key, timestamp, value, node_ids) if new_timestamp > timestamp and self.node.id in node_ids: timestamp, value = new_timestamp, new_value self.__store.set(key, timestamp, value) return timestamp or 0, value def __public_set(self, key, timestamp, value): if __debug__: logging.debug("key: %s", key) node_ids = self.__configuration.find_nodes_for_key(key) if self.__read_repair_enabled: timestamp, value = self.__read_repair(key, timestamp, value, node_ids) if self.node.id in node_ids: self.__store.set(key, timestamp, value) return timestamp def __public_stat(self, key): return self.__public_get(key) def __repair_key(self, key): node_ids = self.__configuration.find_nodes_for_key(key) if self.node.id not in node_ids: self.__store.remove(key) else: timestamp, value = self.__store.get(key) if timestamp: new_timestamp, new_value = self.__read_repair(key, timestamp, value, node_ids) if new_timestamp > timestamp: self.__store.set(key, new_timestamp, new_value) def __repair_store(self): if not self.node: return if not self.__background_repair_enabled: return total_count = self.__store.count() scan_count = 0 start_time = time.time() last_time = start_time elapsed_time = 0 logging.info('Starting store repair. Total %d keys.', total_count) # TODO: Refactor away this cursor muck cursor = self.__store.cursor() while True: key = None try: cursor.next() key = cursor.key() except StopIteration: break except KeyError, e: logging.warning('KeyError: %s', e) except Exception, e: logging.warning('TC Exception: %s', e) # Not supposed to get this... pytc is broken. pass if not key: continue self.__repair_key(key) scan_count += 1 now = time.time() if now - last_time > 5.0: last_time = now elapsed_time = now - start_time if __debug__: logging.debug('Store repair in progress. Scanned %d keys. Elapsed time: %s', scan_count, timedelta(seconds=elapsed_time)) total_count = self.__store.count() if total_count == 0: break if not self.node: break if not self.__background_repair_enabled: break coio.sleep(self.__background_repair_interval_seconds / total_count)