def _reset(cls): """ Delete the instance. """ cl.debug("Resetting LocalDataManager") cls._instance = None cls._hashset = set() cls._local_copy = dict() del cls
async def read_data(self, reader, writer): """ Read data from the connection. NOTE: Do not forget to send an ACK or NACK after using this method. Otherwise the connection might hang up. await self.send_ack(writer) await self.send_nack(writer) """ # wait until we have read something that is up to 1k (until the newline # comes) length_b = await reader.read(1024) if reader.at_eof(): return try: # try and parse it as an int (expecting the length of the data) length = struct.unpack("L", length_b)[0] except Exception as e: # if something goes wrong send a nack and start anew await self.send_nack(writer) cl.error("An Exception occured: {}".format(e)) raise return else: # otherwise send the ack await self.send_ack(writer) try: # try and read exactly the length of the data data = await reader.readexactly(length) res = data.decode("UTF-8") res = json.loads(res) except json.decoder.JSONDecodeError: # if we can not parse the json send a nack and start from the # beginning cl.debug("Parsing {} as json failed".format(res)) await self.send_nack(writer) raise return except Exception as e: # if ANYTHING else goes wrong send a nack and start from the # beginning await self.send_nack(writer) cl.error("An Exception occured: {}".format(e)) raise return else: # otherwise return the received data return res
def _queue_reader_executor(self, pattern=None): """ Read the queue in a separate executor. """ while True: if self._event_shutdown_process.is_set(): cl.debug("Ceph connection shutdown event is set") return None # default pattern, not recommended but if nothing is provided we do this if not pattern: pattern = [{ "queue": self._queue_ceph_task_data, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_index_namespace, "blocking_time": 0 }, { "queue": self._queue_ceph_task_index, "blocking_time": 0 }] override_blocking = False for i, q in enumerate(pattern): try: if override_blocking or q["blocking_time"] == 0: new_ceph_task = q["queue"].get(False) else: new_ceph_task = q["queue"].get(True, q["blocking_time"]) except queue.Empty: # block on the first queue if we have gotten nothing from all queues if pattern[i] == pattern[-1]: # cl.verbose("override_blocking = False") override_blocking = False else: # if we got something from a non priority queue we could speed through this a bit faster if (i >= 1): cl.verbose("override_blocking = True") override_blocking = True return new_ceph_task
def _calc_and_write_objhash(self, objname): """ Calculate the objhash and write it to the obj tags on the cluster. """ cl.debug("Calculating hash for {}".format(objname)) objval = self._get_objval(objname) objhash = hashlib.sha1(objval).hexdigest() try: self._ioctx.set_xattr(objname, "sha1sum", objhash.encode()) except AttributeError: # can't encode objhash pass return objhash
def __del__(self): """ Close and shutdown the connection. """ try: self._ioctx.close() cl.debug("Ceph IO context closed") except: cl.debug("Could not close ceph IO context") try: self._cluster.shutdown() cl.debug("Cluster access shut down") except: cl.debug("Could not shutdown cluster access")
def start_tasks(args): """ Start the three main tasks. """ cl.debug("Starting program tasks") ceph_conf = pathlib.Path(args.config) ceph_pool = args.pool ceph_user = args.user host = "" simulation_port = args.simulation_port backend_port = args.backend_port # create all necessary queues, pipes and events for inter process # communication # # inter process communication for registering new files # # a queue for sending information about new files from the simulation to the # data copy process queue_sim_datacopy_new_file = multiprocessing.Queue() # # a queue for requesting the hash for a new file from the ceph cluster queue_datacopy_ceph_request_hash_for_new_file = multiprocessing.Queue() # # a queue for answering the request for a hash for a new file from the ceph # cluster. contains the name and the hash queue_datacopy_ceph_answer_hash_for_new_file = multiprocessing.Queue() # # a queue for sending the name and hash of a new file to the backend manager queue_datacopy_backend_new_file_and_hash = multiprocessing.Queue() # inter process communication for requesting files from the ceph cluster # # a queue for sending a request for a file to the ceph manager queue_backend_ceph_request_file = multiprocessing.Queue() # # a queue for answering the request for a file with the file name, contents # and hash queue_backend_ceph_answer_file_name_contents_hash = multiprocessing.Queue() # inter process communication for requesting the index for the backend # manager from the data copy # # an event for requesting the index for the backend from the data copy event_datacopy_backend_get_index = multiprocessing.Event() # # a queue for returning the requested index queue_datacopy_backend_index_data = multiprocessing.Queue() # inter process communication for requesting the index for the data manager # from the ceph cluster # # an event for requesting the index for the data copy from the ceph cluster event_datacopy_ceph_update_index = multiprocessing.Event() # # a queue for updating the local datacopy with these names and hashes queue_datacopy_ceph_filename_and_hash = multiprocessing.Queue() # # a lock for queue_datacopy_ceph_filename_and_hash lock_datacopy_ceph_filename_and_hash = multiprocessing.Lock() # inter process communication for shutting down processes # # an event for shutting down the backend manager event_backend_manager_shutdown = multiprocessing.Event() # # an event for shutting down the ceph manager event_ceph_shutdown = multiprocessing.Event() # # an event for shutting down the local data manager event_data_manager_shutdown = multiprocessing.Event() # threads would have done it probably but no time to change now # localdata_manager = multiprocessing.Process( target=LocalDataManager, args=( queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ) ) simulation_manager = multiprocessing.Process( target=SimulationManager, args=( host, simulation_port, queue_sim_datacopy_new_file, ) ) backend_manager = multiprocessing.Process( target=BackendManager, args=( host, backend_port, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, queue_backend_ceph_request_file, queue_backend_ceph_answer_file_name_contents_hash, event_backend_manager_shutdown ) ) ceph_manager = multiprocessing.Process( target=CephManager, args=( ceph_conf, ceph_pool, ceph_user, event_ceph_shutdown, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_backend_ceph_request_file, queue_backend_ceph_answer_file_name_contents_hash, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, lock_datacopy_ceph_filename_and_hash ) ) try: localdata_manager.start() backend_manager.start() simulation_manager.start() ceph_manager.start() localdata_manager.join() backend_manager.join() simulation_manager.join() ceph_manager.join() except KeyboardInterrupt: print() cl.info('Detected KeyboardInterrupt -- Shutting down') event_backend_manager_shutdown.set() event_ceph_shutdown.set() # event_data_manager_shutdown.set() time.sleep(.1) # Give the process some time to flush it all out finally: localdata_manager.terminate() backend_manager.terminate() simulation_manager.terminate() ceph_manager.terminate()
def __init__(cls, queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ): # receive new file information from the simulation cls._queue_sim_datacopy_new_file = queue_sim_datacopy_new_file # request a hash for the file cls._queue_datacopy_ceph_request_hash_for_new_file = queue_datacopy_ceph_request_hash_for_new_file cls._queue_datacopy_ceph_answer_hash_for_new_file = queue_datacopy_ceph_answer_hash_for_new_file # forward file and hash to the backend cls._queue_datacopy_backend_new_file_and_hash = queue_datacopy_backend_new_file_and_hash # serve index requests from the backend cls._event_datacopy_backend_get_index = event_datacopy_backend_get_index cls._queue_datacopy_backend_index_data = queue_datacopy_backend_index_data # request the index from the ceph cluster cls._event_datacopy_ceph_update_index = event_datacopy_ceph_update_index cls._queue_datacopy_ceph_filename_and_hash = queue_datacopy_ceph_filename_and_hash # shutdown event cls._event_data_manager_shutdown = event_data_manager_shutdown # index queue lock cls._lock_datacopy_ceph_filename_and_hash = lock_datacopy_ceph_filename_and_hash try: # # asyncio: watch the queue and the shutdown event cls._loop = asyncio.get_event_loop() # task for reading the queues cls._queue_reader_task = cls._loop.create_task( cls._queue_reader_coro(cls)) # task for periodically updating the index cls._index_updater_task = cls._loop.create_task( cls._index_updater_coro(cls)) cls._periodic_index_update_task = cls._loop.create_task( cls._periodic_index_update_coro(cls)) tasks = [ cls._queue_reader_task, cls._index_updater_task, cls._periodic_index_update_task ] cls._loop.run_until_complete(asyncio.wait(tasks)) # stop the event loop cls._loop.call_soon_threadsafe(cls._loop.stop()) cls.__del__() cl.debug("Shutdown of local data manager process complete") except KeyboardInterrupt: # Ctrl C passes quietly pass
def __init__( self, ceph_config, ceph_pool, pool_user, task_pattern, # pattern to follow when doing tasks queue_ceph_task, # queue for receiving things to do queue_ceph_task_data, # queue for task to retrieve data (contents and hashes) queue_ceph_task_hashes, # queue for task to retrieve hashes (externally) queue_ceph_task_index_namespace, # queue for retrieving the index of a namespace queue_ceph_task_index, # queue for retrieving the index (this will start a series of events like getting the namespaces, then the files in every namespace and then the respective hashes) event_shutdown_process, # when this event is set the connection will be closed queue_index, # return queue for the index queue_namespace_index, # return queue for the index for a namespace queue_object_tags, # return queue for object tags queue_object_data, # return queue for object data (with tags) queue_object_hash # return queue for object hash ): """ initialize connection. """ self._conffile = str(pathlib.Path(ceph_config)) self._target_pool = ceph_pool self._rados_id = pool_user self._task_pattern = task_pattern self._queue_ceph_task = queue_ceph_task self._queue_ceph_task_data = queue_ceph_task_data self._queue_ceph_task_hashes = queue_ceph_task_hashes self._queue_ceph_task_index = queue_ceph_task_index self._queue_ceph_task_index_namespace = queue_ceph_task_index_namespace self._event_shutdown_process = event_shutdown_process self._queue_index = queue_index self._queue_namespace_index = queue_namespace_index self._queue_object_tags = queue_object_tags self._queue_object_data = queue_object_data self._queue_object_hash = queue_object_hash # Connect to cluster self._cluster = rados.Rados(conffile=self._conffile, rados_id=self._rados_id) self._cluster.connect() # Try opening an IO context try: self._ioctx = self._cluster.open_ioctx(self._target_pool) except Exception as ex: cl.error("Exception occured: {}".format(ex)) raise try: # # asyncio: watch the queue and the shutdown event self._loop = asyncio.get_event_loop() # task for reading the queue self._queue_reader_task = self._loop.create_task( self._queue_reader_coro(self._task_pattern)) self._loop.run_until_complete(self._queue_reader_task) # stop the event loop self._loop.call_soon_threadsafe(self._loop.stop()) self.__del__() cl.debug("Shutdown of ceph_connection process complete") except KeyboardInterrupt: # Ctrl C passes quietly pass
async def _queue_reader_coro(self, pattern=None): """ Read the queue for new things to do. """ # select a priority pattern and parse the queues based on that # we do this because it is very difficult to get a fast priority # queue when multiprocessing is involved # data_pattern = [ { "queue": self._queue_ceph_task_data, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, # {"queue": self._queue_ceph_task_index_hashes, "blocking_time": 0} ] # hashes_pattern = [ { "queue": self._queue_ceph_task_hashes, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }, # {"queue": self._queue_ceph_task_index_hashes, "blocking_time": 0} ] index_namespaces_pattern = [{ "queue": self._queue_ceph_task_index_namespace, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }] # index_pattern = [{ "queue": self._queue_ceph_task_index, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }] if pattern == "data": queue_pattern = data_pattern elif pattern == "hashes": queue_pattern = hashes_pattern # elif pattern == "index_hashes": # queue_pattern = index_hashes_pattern elif pattern == "index_namespaces": queue_pattern = index_namespaces_pattern elif pattern == "index": queue_pattern = index_pattern else: cl.verbose_warning( "Pattern {} not found, assigning None".format(pattern)) queue_pattern = None while True: new_task = await self._loop.run_in_executor( None, functools.partial(self._queue_reader_executor, pattern=queue_pattern)) if not new_task: # return None when we want to stop return None try: task = new_task["task"] task_info = new_task["task_info"] except KeyError: cl.warning( "Could not read task dictionary {}".format(new_task)) else: if (task == "read_object_value"): cl.debug("Reading object value, task_info = {}".format( task_info)) object_value_dict = self.read_everything_for_object( task_info) self._queue_object_data.put(object_value_dict) if (task == "read_object_hash"): cl.debug("Reading object hash, task_info = {}".format( task_info)) object_value_dict = self.read_hash_for_object(task_info) self._queue_object_hash.put(object_value_dict) if (task == "read_object_tags"): cl.debug("Reading object tags, task_info = {}".format( task_info)) object_value_dict = self.read_tags_for_object(task_info) self._queue_object_tags.put(object_value_dict) if (task == "read_namespace_index"): cl.debug("Reading namespace index, task_info = {}".format( task_info)) namespace_index_dict = self.read_index_for_namespace( task_info) self._queue_namespace_index.put(namespace_index_dict) if (task == "read_index"): cl.debug("Reading index, task_info = {}".format(task_info)) index_dict = self.read_index(task_info) self._queue_index.put(index_dict) # empty the index request queue, we just finished updating # and dont need to do it for a while while True: try: self._queue_ceph_task_index.get() except queue.Empty: break