def filesystem_receiver(logdir: str, q: "queue.Queue[AddressedMonitoringMessage]", run_dir: str) -> None: logger = start_file_logger("{}/monitoring_filesystem_radio.log".format(logdir), name="monitoring_filesystem_radio", level=logging.DEBUG) logger.info("Starting filesystem radio receiver") setproctitle("parsl: monitoring filesystem receiver") base_path = f"{run_dir}/monitor-fs-radio/" tmp_dir = f"{base_path}/tmp/" new_dir = f"{base_path}/new/" logger.debug(f"Creating new and tmp paths under {base_path}") os.makedirs(tmp_dir, exist_ok=True) os.makedirs(new_dir, exist_ok=True) while True: # this loop will end on process termination logger.info("Start filesystem radio receiver loop") # iterate over files in new_dir for filename in os.listdir(new_dir): try: logger.info(f"Processing filesystem radio file {filename}") full_path_filename = f"{new_dir}/{filename}" with open(full_path_filename, "rb") as f: message = deserialize(f.read()) logger.info(f"Message received is: {message}") assert(isinstance(message, tuple)) q.put(cast(AddressedMonitoringMessage, message)) os.remove(full_path_filename) except Exception: logger.exception(f"Exception processing {filename} - probably will be retried next iteration") time.sleep(1) # whats a good time for this poll?
def _queue_management_worker(self): """ TODO: docstring """ logger.debug("[MTHREAD] queue management worker starting") while not self.bad_state_is_set: task_id, buf = self.incoming_q.get() # TODO: why does this hang? msg = deserialize(buf)[0] # TODO: handle exceptions task_fut = self.tasks[task_id] logger.debug("Got response for task id {}".format(task_id)) if "result" in msg: task_fut.set_result(msg["result"]) elif "exception" in msg: # TODO: handle exception pass elif 'exception' in msg: logger.warning("Task: {} has returned with an exception") try: s = deserialize(msg['exception']) exception = ValueError( "Remote exception description: {}".format(s)) task_fut.set_exception(exception) except Exception as e: # TODO could be a proper wrapped exception? task_fut.set_exception( DeserializationError( "Received exception, but handling also threw an exception: {}" .format(e))) else: raise BadMessage( "Message received is neither result nor exception") if not self.is_alive: break logger.info("[MTHREAD] queue management worker finished")
def _complete_future( expected_file: str, future_wrapper: FluxFutureWrapper, flux_future: Any ): """Callback triggered when a FluxExecutorFuture completes. When the FluxExecutorFuture completes, check for the Parsl task's output file, and assign the result to the FluxWrapperFuture future. Parameters ---------- expected_file : str The path to the Parsl task's output file, storing the result of the task. future_wrapper : FluxFutureWrapper The user-facing future. flux_future : FluxExecutorFuture The future wrapped by ``future_wrapper``. Also accessible via ``future_wrapper``, but the flux_future must be accepted as an argument due to how ``concurrent.futures.add_done_callback`` works. """ if flux_future.cancelled(): # if underlying future was cancelled, return return # no need to set a result on the wrapper future try: returncode = flux_future.result() except Exception as unknown_err: future_wrapper.set_exception(unknown_err) return if returncode == 0: try: # look for the output file with open(expected_file, "rb") as file_handle: task_result = deserialize(file_handle.read()) except FileNotFoundError: future_wrapper.set_exception( FileNotFoundError( f"No result found for Parsl task, expected {expected_file}" ) ) except Exception as unknown_err: future_wrapper.set_exception(unknown_err) else: # task package deserialized successfully if task_result.exception is not None: future_wrapper.set_exception(task_result.exception) else: future_wrapper.set_result(task_result.returnval) else: # the job exited abnormally future_wrapper.set_exception( AppException(f"Parsl task exited abnormally: returned {returncode}") )
def runner(incoming_q, outgoing_q): """This is a function that mocks the Swift-T side. It listens on the the incoming_q for tasks and posts returns on the outgoing_q. Args: - incoming_q (Queue object) : The queue to listen on - outgoing_q (Queue object) : Queue to post results on The messages posted on the incoming_q will be of the form : .. code:: python { "task_id" : <uuid.uuid4 string>, "buffer" : serialized buffer containing the fn, args and kwargs } If ``None`` is received, the runner will exit. Response messages should be of the form: .. code:: python { "task_id" : <uuid.uuid4 string>, "result" : serialized buffer containing result "exception" : serialized exception object } On exiting the runner will post ``None`` to the outgoing_q """ logger.debug("[RUNNER] Starting") def execute_task(bufs): """Deserialize the buffer and execute the task. Returns the serialized result or exception. """ user_ns = locals() user_ns.update({'__builtins__': __builtins__}) f, args, kwargs = unpack_apply_message(bufs, user_ns, copy=False) fname = getattr(f, '__name__', 'f') prefix = "parsl_" fname = prefix + "f" argname = prefix + "args" kwargname = prefix + "kwargs" resultname = prefix + "result" user_ns.update({ fname: f, argname: args, kwargname: kwargs, resultname: resultname }) code = "{0} = {1}(*{2}, **{3})".format(resultname, fname, argname, kwargname) try: logger.debug("[RUNNER] Executing: {0}".format(code)) exec(code, user_ns, user_ns) except Exception as e: logger.warning("Caught exception; will raise it: {}".format(e)) raise e else: logger.debug("[RUNNER] Result: {0}".format( user_ns.get(resultname))) return user_ns.get(resultname) while True: try: # Blocking wait on the queue msg = incoming_q.get(block=True, timeout=10) except queue.Empty: # Handle case where no items were in the queue logger.debug("[RUNNER] Queue is empty") except IOError as e: logger.debug("[RUNNER] Broken pipe: {}".format(e)) try: # Attempt to send a stop notification to the management thread outgoing_q.put(None) except Exception: pass break except Exception as e: logger.debug("[RUNNER] Caught unknown exception: {}".format(e)) else: # Handle received message if not msg: # Empty message is a die request logger.debug("[RUNNER] Received exit request") outgoing_q.put(None) break else: # Received a valid message, handle it logger.debug("[RUNNER] Got a valid task with ID {}".format( msg["task_id"])) try: response_obj = execute_task(msg['buffer']) response = { "task_id": msg["task_id"], "result": serialize(response_obj) } logger.debug("[RUNNER] Returing result: {}".format( deserialize(response["result"]))) except Exception as e: logger.debug( "[RUNNER] Caught task exception: {}".format(e)) response = { "task_id": msg["task_id"], "exception": serialize(e) } outgoing_q.put(response) logger.debug("[RUNNER] Terminating")
def _queue_management_worker(self): """Listen to the queue for task status messages and handle them. Depending on the message, tasks will be updated with results, exceptions, or updates. It expects the following messages: .. code:: python { "task_id" : <task_id> "result" : serialized result object, if task succeeded ... more tags could be added later } { "task_id" : <task_id> "exception" : serialized exception object, on failure } We do not support these yet, but they could be added easily. .. code:: python { "task_id" : <task_id> "cpu_stat" : <> "mem_stat" : <> "io_stat" : <> "started" : tstamp } The `None` message is a die request. """ while True: logger.debug("[MTHREAD] Management thread active") try: msg = self.incoming_q.get(block=True, timeout=1) except queue.Empty: # Timed out. pass except IOError as e: logger.debug( "[MTHREAD] Caught broken queue with exception code {}: {}". format(e.errno, e)) return except Exception as e: logger.debug( "[MTHREAD] Caught unknown exception: {}".format(e)) else: if msg is None: logger.debug("[MTHREAD] Got None") return else: logger.debug("[MTHREAD] Received message: {}".format(msg)) task_fut = self.tasks[msg['task_id']] if 'result' in msg: result, _ = deserialize(msg['result']) task_fut.set_result(result) elif 'exception' in msg: exception, _ = deserialize(msg['exception']) task_fut.set_exception(exception) if not self.is_alive: break
def _queue_management_worker(self): """Listen to the queue for task status messages and handle them. Depending on the message, tasks will be updated with results, exceptions, or updates. It expects the following messages: .. code:: python { "task_id" : <task_id> "result" : serialized result object, if task succeeded ... more tags could be added later } { "task_id" : <task_id> "exception" : serialized exception object, on failure } We do not support these yet, but they could be added easily. .. code:: python { "task_id" : <task_id> "cpu_stat" : <> "mem_stat" : <> "io_stat" : <> "started" : tstamp } The `None` message is a die request. """ logger.debug("[MTHREAD] queue management worker starting") while not self.bad_state_is_set: try: msgs = self.incoming_q.get(timeout=1) except queue.Empty: logger.debug("[MTHREAD] queue empty") # Timed out. pass except IOError as e: logger.exception( "[MTHREAD] Caught broken queue with exception code {}: {}". format(e.errno, e)) return except Exception as e: logger.exception( "[MTHREAD] Caught unknown exception: {}".format(e)) return else: if msgs is None: logger.debug("[MTHREAD] Got None, exiting") return else: for serialized_msg in msgs: try: msg = pickle.loads(serialized_msg) tid = msg['task_id'] except pickle.UnpicklingError: raise BadMessage( "Message received could not be unpickled") except Exception: raise BadMessage( "Message received does not contain 'task_id' field" ) if tid == -1 and 'exception' in msg: logger.warning( "Executor shutting down due to exception from interchange" ) exception = deserialize(msg['exception']) self.set_bad_state_and_fail_all(exception) break task_fut = self.tasks.pop(tid) if 'result' in msg: result = deserialize(msg['result']) task_fut.set_result(result) elif 'exception' in msg: try: s = deserialize(msg['exception']) # s should be a RemoteExceptionWrapper... so we can reraise it if isinstance(s, RemoteExceptionWrapper): try: s.reraise() except Exception as e: task_fut.set_exception(e) elif isinstance(s, Exception): task_fut.set_exception(s) else: raise ValueError( "Unknown exception-like type received: {}" .format(type(s))) except Exception as e: # TODO could be a proper wrapped exception? task_fut.set_exception( DeserializationError( "Received exception, but handling also threw an exception: {}" .format(e))) else: raise BadMessage( "Message received is neither result or exception" ) if not self.is_alive: break logger.info("[MTHREAD] queue management worker finished")