Example #1
0
def filesystem_receiver(logdir: str, q: "queue.Queue[AddressedMonitoringMessage]", run_dir: str) -> None:
    logger = start_file_logger("{}/monitoring_filesystem_radio.log".format(logdir),
                               name="monitoring_filesystem_radio",
                               level=logging.DEBUG)

    logger.info("Starting filesystem radio receiver")
    setproctitle("parsl: monitoring filesystem receiver")
    base_path = f"{run_dir}/monitor-fs-radio/"
    tmp_dir = f"{base_path}/tmp/"
    new_dir = f"{base_path}/new/"
    logger.debug(f"Creating new and tmp paths under {base_path}")

    os.makedirs(tmp_dir, exist_ok=True)
    os.makedirs(new_dir, exist_ok=True)

    while True:  # this loop will end on process termination
        logger.info("Start filesystem radio receiver loop")

        # iterate over files in new_dir
        for filename in os.listdir(new_dir):
            try:
                logger.info(f"Processing filesystem radio file {filename}")
                full_path_filename = f"{new_dir}/{filename}"
                with open(full_path_filename, "rb") as f:
                    message = deserialize(f.read())
                logger.info(f"Message received is: {message}")
                assert(isinstance(message, tuple))
                q.put(cast(AddressedMonitoringMessage, message))
                os.remove(full_path_filename)
            except Exception:
                logger.exception(f"Exception processing {filename} - probably will be retried next iteration")

        time.sleep(1)  # whats a good time for this poll?
Example #2
0
    def _queue_management_worker(self):
        """ TODO: docstring """
        logger.debug("[MTHREAD] queue management worker starting")

        while not self.bad_state_is_set:
            task_id, buf = self.incoming_q.get()  # TODO: why does this hang?
            msg = deserialize(buf)[0]
            # TODO: handle exceptions
            task_fut = self.tasks[task_id]
            logger.debug("Got response for task id {}".format(task_id))

            if "result" in msg:
                task_fut.set_result(msg["result"])

            elif "exception" in msg:
                # TODO: handle exception
                pass
            elif 'exception' in msg:
                logger.warning("Task: {} has returned with an exception")
                try:
                    s = deserialize(msg['exception'])
                    exception = ValueError(
                        "Remote exception description: {}".format(s))
                    task_fut.set_exception(exception)
                except Exception as e:
                    # TODO could be a proper wrapped exception?
                    task_fut.set_exception(
                        DeserializationError(
                            "Received exception, but handling also threw an exception: {}"
                            .format(e)))

            else:
                raise BadMessage(
                    "Message received is neither result nor exception")

            if not self.is_alive:
                break

        logger.info("[MTHREAD] queue management worker finished")
Example #3
0
def _complete_future(
    expected_file: str, future_wrapper: FluxFutureWrapper, flux_future: Any
):
    """Callback triggered when a FluxExecutorFuture completes.

    When the FluxExecutorFuture completes, check for the Parsl task's
    output file, and assign the result to the FluxWrapperFuture future.

    Parameters
    ----------
    expected_file : str
        The path to the Parsl task's output file, storing the result of the task.
    future_wrapper : FluxFutureWrapper
        The user-facing future.
    flux_future : FluxExecutorFuture
        The future wrapped by ``future_wrapper``. Also accessible via
        ``future_wrapper``, but the flux_future must be accepted as an argument
        due to how ``concurrent.futures.add_done_callback`` works.
    """
    if flux_future.cancelled():  # if underlying future was cancelled, return
        return  # no need to set a result on the wrapper future
    try:
        returncode = flux_future.result()
    except Exception as unknown_err:
        future_wrapper.set_exception(unknown_err)
        return
    if returncode == 0:
        try:  # look for the output file
            with open(expected_file, "rb") as file_handle:
                task_result = deserialize(file_handle.read())
        except FileNotFoundError:
            future_wrapper.set_exception(
                FileNotFoundError(
                    f"No result found for Parsl task, expected {expected_file}"
                )
            )
        except Exception as unknown_err:
            future_wrapper.set_exception(unknown_err)
        else:  # task package deserialized successfully
            if task_result.exception is not None:
                future_wrapper.set_exception(task_result.exception)
            else:
                future_wrapper.set_result(task_result.returnval)
    else:  # the job exited abnormally
        future_wrapper.set_exception(
            AppException(f"Parsl task exited abnormally: returned {returncode}")
        )
Example #4
0
def runner(incoming_q, outgoing_q):
    """This is a function that mocks the Swift-T side.

    It listens on the the incoming_q for tasks and posts returns on the outgoing_q.

    Args:
         - incoming_q (Queue object) : The queue to listen on
         - outgoing_q (Queue object) : Queue to post results on

    The messages posted on the incoming_q will be of the form :

    .. code:: python

       {
          "task_id" : <uuid.uuid4 string>,
          "buffer"  : serialized buffer containing the fn, args and kwargs
       }

    If ``None`` is received, the runner will exit.

    Response messages should be of the form:

    .. code:: python

       {
          "task_id" : <uuid.uuid4 string>,
          "result"  : serialized buffer containing result
          "exception" : serialized exception object
       }

    On exiting the runner will post ``None`` to the outgoing_q

    """
    logger.debug("[RUNNER] Starting")

    def execute_task(bufs):
        """Deserialize the buffer and execute the task.

        Returns the serialized result or exception.
        """
        user_ns = locals()
        user_ns.update({'__builtins__': __builtins__})

        f, args, kwargs = unpack_apply_message(bufs, user_ns, copy=False)

        fname = getattr(f, '__name__', 'f')
        prefix = "parsl_"
        fname = prefix + "f"
        argname = prefix + "args"
        kwargname = prefix + "kwargs"
        resultname = prefix + "result"

        user_ns.update({
            fname: f,
            argname: args,
            kwargname: kwargs,
            resultname: resultname
        })

        code = "{0} = {1}(*{2}, **{3})".format(resultname, fname, argname,
                                               kwargname)

        try:
            logger.debug("[RUNNER] Executing: {0}".format(code))
            exec(code, user_ns, user_ns)

        except Exception as e:
            logger.warning("Caught exception; will raise it: {}".format(e))
            raise e

        else:
            logger.debug("[RUNNER] Result: {0}".format(
                user_ns.get(resultname)))
            return user_ns.get(resultname)

    while True:
        try:
            # Blocking wait on the queue
            msg = incoming_q.get(block=True, timeout=10)

        except queue.Empty:
            # Handle case where no items were in the queue
            logger.debug("[RUNNER] Queue is empty")

        except IOError as e:
            logger.debug("[RUNNER] Broken pipe: {}".format(e))
            try:
                # Attempt to send a stop notification to the management thread
                outgoing_q.put(None)

            except Exception:
                pass

            break

        except Exception as e:
            logger.debug("[RUNNER] Caught unknown exception: {}".format(e))

        else:
            # Handle received message
            if not msg:
                # Empty message is a die request
                logger.debug("[RUNNER] Received exit request")
                outgoing_q.put(None)
                break
            else:
                # Received a valid message, handle it
                logger.debug("[RUNNER] Got a valid task with ID {}".format(
                    msg["task_id"]))
                try:
                    response_obj = execute_task(msg['buffer'])
                    response = {
                        "task_id": msg["task_id"],
                        "result": serialize(response_obj)
                    }

                    logger.debug("[RUNNER] Returing result: {}".format(
                        deserialize(response["result"])))

                except Exception as e:
                    logger.debug(
                        "[RUNNER] Caught task exception: {}".format(e))
                    response = {
                        "task_id": msg["task_id"],
                        "exception": serialize(e)
                    }

                outgoing_q.put(response)

    logger.debug("[RUNNER] Terminating")
Example #5
0
    def _queue_management_worker(self):
        """Listen to the queue for task status messages and handle them.

        Depending on the message, tasks will be updated with results, exceptions,
        or updates. It expects the following messages:

        .. code:: python

            {
               "task_id" : <task_id>
               "result"  : serialized result object, if task succeeded
               ... more tags could be added later
            }

            {
               "task_id" : <task_id>
               "exception" : serialized exception object, on failure
            }

        We do not support these yet, but they could be added easily.

        .. code:: python

            {
               "task_id" : <task_id>
               "cpu_stat" : <>
               "mem_stat" : <>
               "io_stat"  : <>
               "started"  : tstamp
            }

        The `None` message is a die request.
        """
        while True:
            logger.debug("[MTHREAD] Management thread active")
            try:
                msg = self.incoming_q.get(block=True, timeout=1)

            except queue.Empty:
                # Timed out.
                pass

            except IOError as e:
                logger.debug(
                    "[MTHREAD] Caught broken queue with exception code {}: {}".
                    format(e.errno, e))
                return

            except Exception as e:
                logger.debug(
                    "[MTHREAD] Caught unknown exception: {}".format(e))

            else:

                if msg is None:
                    logger.debug("[MTHREAD] Got None")
                    return

                else:
                    logger.debug("[MTHREAD] Received message: {}".format(msg))
                    task_fut = self.tasks[msg['task_id']]
                    if 'result' in msg:
                        result, _ = deserialize(msg['result'])
                        task_fut.set_result(result)

                    elif 'exception' in msg:
                        exception, _ = deserialize(msg['exception'])
                        task_fut.set_exception(exception)

            if not self.is_alive:
                break
Example #6
0
    def _queue_management_worker(self):
        """Listen to the queue for task status messages and handle them.

        Depending on the message, tasks will be updated with results, exceptions,
        or updates. It expects the following messages:

        .. code:: python

            {
               "task_id" : <task_id>
               "result"  : serialized result object, if task succeeded
               ... more tags could be added later
            }

            {
               "task_id" : <task_id>
               "exception" : serialized exception object, on failure
            }

        We do not support these yet, but they could be added easily.

        .. code:: python

            {
               "task_id" : <task_id>
               "cpu_stat" : <>
               "mem_stat" : <>
               "io_stat"  : <>
               "started"  : tstamp
            }

        The `None` message is a die request.
        """
        logger.debug("[MTHREAD] queue management worker starting")

        while not self.bad_state_is_set:
            try:
                msgs = self.incoming_q.get(timeout=1)

            except queue.Empty:
                logger.debug("[MTHREAD] queue empty")
                # Timed out.
                pass

            except IOError as e:
                logger.exception(
                    "[MTHREAD] Caught broken queue with exception code {}: {}".
                    format(e.errno, e))
                return

            except Exception as e:
                logger.exception(
                    "[MTHREAD] Caught unknown exception: {}".format(e))
                return

            else:

                if msgs is None:
                    logger.debug("[MTHREAD] Got None, exiting")
                    return

                else:
                    for serialized_msg in msgs:
                        try:
                            msg = pickle.loads(serialized_msg)
                            tid = msg['task_id']
                        except pickle.UnpicklingError:
                            raise BadMessage(
                                "Message received could not be unpickled")

                        except Exception:
                            raise BadMessage(
                                "Message received does not contain 'task_id' field"
                            )

                        if tid == -1 and 'exception' in msg:
                            logger.warning(
                                "Executor shutting down due to exception from interchange"
                            )
                            exception = deserialize(msg['exception'])
                            self.set_bad_state_and_fail_all(exception)
                            break

                        task_fut = self.tasks.pop(tid)

                        if 'result' in msg:
                            result = deserialize(msg['result'])
                            task_fut.set_result(result)

                        elif 'exception' in msg:
                            try:
                                s = deserialize(msg['exception'])
                                # s should be a RemoteExceptionWrapper... so we can reraise it
                                if isinstance(s, RemoteExceptionWrapper):
                                    try:
                                        s.reraise()
                                    except Exception as e:
                                        task_fut.set_exception(e)
                                elif isinstance(s, Exception):
                                    task_fut.set_exception(s)
                                else:
                                    raise ValueError(
                                        "Unknown exception-like type received: {}"
                                        .format(type(s)))
                            except Exception as e:
                                # TODO could be a proper wrapped exception?
                                task_fut.set_exception(
                                    DeserializationError(
                                        "Received exception, but handling also threw an exception: {}"
                                        .format(e)))
                        else:
                            raise BadMessage(
                                "Message received is neither result or exception"
                            )

            if not self.is_alive:
                break
        logger.info("[MTHREAD] queue management worker finished")