Exemple #1
0
    def _collect_work_queue_results(self):
        """Sets the values of tasks' futures of tasks completed by work queue.
        """
        logger.debug("Starting Collector Thread")
        try:
            while not self.should_stop.value:
                if not self.submit_process.is_alive():
                    raise ExecutorError(self, "Workqueue Submit Process is not alive")

                # Get the result message from the collector_queue
                try:
                    task_report = self.collector_queue.get(timeout=1)
                except queue.Empty:
                    continue

                # Obtain the future from the tasks dictionary
                with self.tasks_lock:
                    future = self.tasks[task_report.id]

                logger.debug("Updating Future for Parsl Task {}".format(task_report.id))
                if task_report.result_received:
                    future.set_result(task_report.result)
                else:
                    # If there are no results, then the task failed according to one of
                    # work queue modes, such as resource exhaustion.
                    future.set_exception(WorkQueueTaskFailure(task_report.reason, task_report.result))
        finally:
            with self.tasks_lock:
                # set exception for tasks waiting for results that work queue did not execute
                for fu in self.tasks.values():
                    if not fu.done():
                        fu.set_exception(WorkQueueFailure("work queue executor failed to execute the task."))
        logger.debug("Exiting Collector Thread")
Exemple #2
0
def WorkQueueCollectorThread(collector_queue=multiprocessing.Queue(),
                             tasks={},
                             tasks_lock=threading.Lock(),
                             cancel_value=multiprocessing.Value('i', 1),
                             submit_process=None,
                             executor=None):
    """Processes completed Parsl tasks. If an error arose while the Parsl task
    was executed, raises the exception on the local machine.
    """

    logger.debug("Starting Collector Thread")

    continue_running = True
    while continue_running:
        if cancel_value.value == 0:
            continue_running = False
            continue

        # The WorkQueue process that creates task has died
        if not submit_process.is_alive() and cancel_value.value != 0:
            raise ExecutorError(executor,
                                "Workqueue Submit Process is not alive")

        # Get the result message from the collector_queue
        try:
            item = collector_queue.get(timeout=1)
        except queue.Empty:
            continue

        parsl_tid = item["tid"]
        received = item["result_received"]

        # Obtain the future from the tasks dictionary
        tasks_lock.acquire()
        future = tasks[parsl_tid]
        tasks_lock.release()

        # Failed task
        if received is False:
            reason = item["reason"]
            status = item["status"]
            future.set_exception(AppFailure(reason, status))
        # Successful task
        else:
            result = item["result"]
            future_update = result["result"]
            logger.debug("Updating Future for Parsl Task {}".format(parsl_tid))
            if result["failure"] is False:
                future.set_result(future_update)
            else:
                future_fail = pickle.loads(future_update)
                exc = RemoteExceptionWrapper(*future_fail)
                try:
                    exc.reraise()
                except Exception as e:
                    future.set_exception(e)

    logger.debug("Exiting Collector Thread")
    return
Exemple #3
0
def WorkQueueCollectorThread(collector_queue=multiprocessing.Queue(),
                             tasks={},
                             tasks_lock=threading.Lock(),
                             cancel_value=multiprocessing.Value('i', 1),
                             submit_process=None,
                             executor=None):

    logger.debug("Starting Collector Thread")

    continue_running = True
    while continue_running:
        if cancel_value.value == 0:
            continue_running = False
            continue

        if not submit_process.is_alive() and cancel_value.value != 0:
            raise ExecutorError(executor,
                                "Workqueue Submit Process is not alive")

        try:
            item = collector_queue.get(timeout=1)
        except queue.Empty:
            continue

        parsl_tid = item["tid"]
        received = item["result_received"]

        tasks_lock.acquire()
        future = tasks[parsl_tid]
        tasks_lock.release()

        if received is False:
            reason = item["reason"]
            status = item["status"]
            future.set_exception(AppFailure(reason, status))
        else:
            result = item["result"]
            future_update, _ = deserialize_object(result["result"])
            logger.debug("Updating Future for Parsl Task {}".format(parsl_tid))
            if result["failure"] is False:
                future.set_result(future_update)
            else:
                future.set_exception(RemoteExceptionWrapper(*future_update))

    logger.debug("Exiting Collector Thread")
    return
Exemple #4
0
    def submit(self, func, resource_specification, *args, **kwargs):
        """Processes the Parsl app by its arguments and submits the function
        information to the task queue, to be executed using the Work Queue
        system. The args and kwargs are processed for input and output files to
        the Parsl app, so that the files are appropriately specified for the Work
        Queue task.

        Parameters
        ----------

        func : function
            Parsl app to be submitted to the Work Queue system
        args : list
            Arguments to the Parsl app
        kwargs : dict
            Keyword arguments to the Parsl app
        """
        self.task_counter += 1
        task_id = self.task_counter

        # Create a per task directory for the function, result, map, and result files
        os.mkdir(self._path_in_task(task_id))

        input_files = []
        output_files = []

        # Determine the input and output files that will exist at the workes:
        input_files += [
            self._register_file(f) for f in kwargs.get("inputs", [])
            if isinstance(f, File)
        ]
        output_files += [
            self._register_file(f) for f in kwargs.get("outputs", [])
            if isinstance(f, File)
        ]

        # Also consider any *arg that looks like a file as an input:
        input_files += [
            self._register_file(f) for f in args if isinstance(f, File)
        ]

        for kwarg, maybe_file in kwargs.items():
            # Add appropriate input and output files from "stdout" and "stderr" keyword arguments
            if kwarg == "stdout" or kwarg == "stderr":
                if maybe_file:
                    output_files.append(
                        self._std_output_to_wq(kwarg, maybe_file))
            # For any other keyword that looks like a file, assume it is an input file
            elif isinstance(maybe_file, File):
                input_files.append(self._register_file(maybe_file))

        # Create a Future object and have it be mapped from the task ID in the tasks dictionary
        fu = Future()
        with self.tasks_lock:
            self.tasks[str(task_id)] = fu

        logger.debug("Creating task {} for function {} with args {}".format(
            task_id, func, args))

        # Pickle the result into object to pass into message buffer
        function_file = self._path_in_task(task_id, "function")
        result_file = self._path_in_task(task_id, "result")
        map_file = self._path_in_task(task_id, "map")

        logger.debug("Creating Task {} with function at: {}".format(
            task_id, function_file))
        logger.debug("Creating Task {} with result to be found at: {}".format(
            task_id, result_file))

        self._serialize_function(function_file, func, args, kwargs)

        if self.pack:
            env_pkg = self._prepare_package(func)
        else:
            env_pkg = None

        logger.debug(
            "Constructing map for local filenames at worker for task {}".
            format(task_id))
        self._construct_map_file(map_file, input_files, output_files)

        if not self.submit_process.is_alive():
            raise ExecutorError(self, "Workqueue Submit Process is not alive")

        # Create message to put into the message queue
        logger.debug("Placing task {} on message queue".format(task_id))
        category = func.__qualname__ if self.autocategory else 'parsl-default'
        self.task_queue.put_nowait(
            ParslTaskToWq(task_id, category, env_pkg, map_file, function_file,
                          result_file, input_files, output_files))

        return fu
Exemple #5
0
    def submit(self, func, *args, **kwargs):
        """Processes the Parsl app by its arguments and submits the function
        information to the task queue, to be executed using the Work Queue
        system. The args and kwargs are processed for input and output files to
        the Parsl app, so that the files are appropriately specified for the Work
        Queue task.

        Parameters
        ----------

        func : function
            Parsl app to be submitted to the Work Queue system
        args : list
            Arguments to the Parsl app
        kwargs : dict
            Keyword arguments to the Parsl app
        """
        self.task_counter += 1
        task_id = self.task_counter

        input_files = []
        output_files = []
        std_files = []

        # Add input files from the "inputs" keyword argument
        func_inputs = kwargs.get("inputs", [])
        for inp in func_inputs:
            if isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        for kwarg, inp in kwargs.items():
            # Add appropriate input and output files from "stdout" and "stderr" keyword arguments
            if kwarg == "stdout" or kwarg == "stderr":
                if (isinstance(inp, tuple) and len(inp) > 1
                        and isinstance(inp[0], str)
                        and isinstance(inp[1], str)) or isinstance(inp, str):
                    if isinstance(inp, tuple):
                        inp = inp[0]
                    if not os.path.exists(
                            os.path.join(".",
                                         os.path.split(inp)[0])):
                        continue
                    # Create "std" files instead of input or output files
                    if inp in self.registered_files:
                        input_files.append(
                            (inp, os.path.basename(inp) + "-1", False, "std"))
                        output_files.append(
                            (inp, os.path.basename(inp), False, "std"))
                    else:
                        output_files.append(
                            (inp, os.path.basename(inp), False, "std"))
                        self.registered_files.add(inp)
            # Add to input file if passed-in argument is a File object
            elif isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        # Add to input file if passed-in argument is a File object
        for inp in args:
            if isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        # Add output files from the "outputs" keyword argument
        func_outputs = kwargs.get("outputs", [])
        for output in func_outputs:
            if isinstance(output, File):
                output_files.append(self.create_name_tuple(output, "out"))

        if not self.submit_process.is_alive():
            raise ExecutorError(self, "Workqueue Submit Process is not alive")

        # Create a Future object and have it be mapped from the task ID in the tasks dictionary
        fu = Future()
        self.tasks_lock.acquire()
        self.tasks[str(task_id)] = fu
        self.tasks_lock.release()

        logger.debug("Creating task {} for function {} with args {}".format(
            task_id, func, args))

        # Pickle the result into object to pass into message buffer
        function_data_file = os.path.join(
            self.function_data_dir, "task_" + str(task_id) + "_function_data")
        function_result_file = os.path.join(
            self.function_data_dir,
            "task_" + str(task_id) + "_function_result")

        logger.debug("Creating Task {} with executable at: {}".format(
            task_id, function_data_file))
        logger.debug("Creating Task {} with result to be found at: {}".format(
            task_id, function_result_file))

        # Obtain function information and put into dictionary
        if self.source:
            # Obtain function information and put into dictionary
            source_code = inspect.getsource(func)
            name = func.__name__
            function_info = {
                "source code": source_code,
                "name": name,
                "args": args,
                "kwargs": kwargs
            }

            # Pack the function data into file
            f = open(function_data_file, "wb")
            pickle.dump(function_info, f)
            f.close()
        else:
            # Serialize function information
            function_info = pack_apply_message(func,
                                               args,
                                               kwargs,
                                               buffer_threshold=1024 * 1024,
                                               item_threshold=1024)

            # Pack the function data into file
            f = open(function_data_file, "wb")
            pickle.dump(function_info, f)
            f.close()

        # Create message to put into the message queue
        logger.debug("Placing task {} on message queue".format(task_id))
        msg = {
            "task_id": task_id,
            "data_loc": function_data_file,
            "result_loc": function_result_file,
            "input_files": input_files,
            "output_files": output_files,
            "std_files": std_files
        }
        self.task_queue.put_nowait(msg)

        return fu
Exemple #6
0
    def submit(self, func, *args, **kwargs):
        """Submit.

        We haven't yet decided on what the args to this can be,
        whether it should just be func, args, kwargs or be the partially evaluated
        fn
        """
        self.task_counter += 1
        task_id = self.task_counter

        input_files = []
        output_files = []
        std_files = []

        func_inputs = kwargs.get("inputs", [])
        for inp in func_inputs:
            if isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        for kwarg, inp in kwargs.items():
            if kwarg == "stdout" or kwarg == "stderr":
                if (isinstance(inp, tuple) and len(inp) > 1
                        and isinstance(inp[0], str)
                        and isinstance(inp[1], str)) or isinstance(inp, str):
                    if isinstance(inp, tuple):
                        inp = inp[0]
                    print(os.path.split(inp))
                    if not os.path.exists(
                            os.path.join(".",
                                         os.path.split(inp)[0])):
                        continue
                    if inp in self.registered_files:
                        input_files.append(
                            (inp, os.path.basename(inp) + "-1", False, "std"))
                        output_files.append(
                            (inp, os.path.basename(inp), False, "std"))
                    else:
                        output_files.append(
                            (inp, os.path.basename(inp), False, "std"))
                        self.registered_files.add(inp)
            elif isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        for inp in args:
            if isinstance(inp, File):
                input_files.append(self.create_name_tuple(inp, "in"))

        func_outputs = kwargs.get("outputs", [])
        for output in func_outputs:
            if isinstance(output, File):
                output_files.append(self.create_name_tuple(output, "out"))

        if not self.submit_process.is_alive():
            raise ExecutorError(self, "Workqueue Submit Process is not alive")

        fu = Future()
        self.tasks_lock.acquire()
        self.tasks[str(task_id)] = fu
        self.tasks_lock.release()

        logger.debug("Creating task {} for function {} with args {}".format(
            task_id, func, args))

        # Pickle the result into object to pass into message buffer
        # TODO Try/Except Block
        function_data_file = os.path.join(
            self.function_data_dir, "task_" + str(task_id) + "_function_data")
        function_result_file = os.path.join(
            self.function_data_dir,
            "task_" + str(task_id) + "_function_result")

        logger.debug("Creating Task {} with executable at: {}".format(
            task_id, function_data_file))
        logger.debug("Creating Task {} with result to be found at: {}".format(
            task_id, function_result_file))

        f = open(function_data_file, "wb")
        fn_buf = pack_apply_message(func,
                                    args,
                                    kwargs,
                                    buffer_threshold=1024 * 1024,
                                    item_threshold=1024)
        pickle.dump(fn_buf, f)
        f.close()

        logger.debug("Placing task {} on message queue".format(task_id))
        msg = {
            "task_id": task_id,
            "data_loc": function_data_file,
            "result_loc": function_result_file,
            "input_files": input_files,
            "output_files": output_files,
            "std_files": std_files
        }

        self.task_queue.put_nowait(msg)

        return fu
Exemple #7
0
    def submit(self, func, resource_specification, *args, **kwargs):
        """Processes the Parsl app by its arguments and submits the function
        information to the task queue, to be executed using the Work Queue
        system. The args and kwargs are processed for input and output files to
        the Parsl app, so that the files are appropriately specified for the Work
        Queue task.

        Parameters
        ----------

        func : function
            Parsl app to be submitted to the Work Queue system
        args : list
            Arguments to the Parsl app
        kwargs : dict
            Keyword arguments to the Parsl app
        """
        cores = None
        memory = None
        disk = None
        gpus = None
        if resource_specification and isinstance(resource_specification, dict):
            logger.debug("Got resource specification: {}".format(
                resource_specification))

            required_resource_types = set(['cores', 'memory', 'disk'])
            acceptable_resource_types = set(
                ['cores', 'memory', 'disk', 'gpus'])
            keys = set(resource_specification.keys())

            if not keys.issubset(acceptable_resource_types):
                message = "Task resource specification only accepts these types of resources: {}".format(
                    ', '.join(acceptable_resource_types))
                logger.error(message)
                raise ExecutorError(self, message)

            if not self.autolabel and not keys.issuperset(
                    required_resource_types):
                logger.error(
                    "Running with `autolabel=False`. In this mode, "
                    "task resource specification requires "
                    "three resources to be specified simultaneously: cores, memory, and disk"
                )
                raise ExecutorError(
                    self, "Task resource specification requires "
                    "three resources to be specified simultaneously: cores, memory, and disk. "
                    "Try setting autolabel=True if you are unsure of the resource usage"
                )

            for k in keys:
                if k == 'cores':
                    cores = resource_specification[k]
                elif k == 'memory':
                    memory = resource_specification[k]
                elif k == 'disk':
                    disk = resource_specification[k]
                elif k == 'gpus':
                    gpus = resource_specification[k]

        self.task_counter += 1
        task_id = self.task_counter

        # Create a per task directory for the function, result, map, and result files
        os.mkdir(self._path_in_task(task_id))

        input_files = []
        output_files = []

        # Determine the input and output files that will exist at the workes:
        input_files += [
            self._register_file(f) for f in kwargs.get("inputs", [])
            if isinstance(f, File)
        ]
        output_files += [
            self._register_file(f) for f in kwargs.get("outputs", [])
            if isinstance(f, File)
        ]

        # Also consider any *arg that looks like a file as an input:
        input_files += [
            self._register_file(f) for f in args if isinstance(f, File)
        ]

        for kwarg, maybe_file in kwargs.items():
            # Add appropriate input and output files from "stdout" and "stderr" keyword arguments
            if kwarg == "stdout" or kwarg == "stderr":
                if maybe_file:
                    output_files.append(
                        self._std_output_to_wq(kwarg, maybe_file))
            # For any other keyword that looks like a file, assume it is an input file
            elif isinstance(maybe_file, File):
                input_files.append(self._register_file(maybe_file))

        # Create a Future object and have it be mapped from the task ID in the tasks dictionary
        fu = Future()
        with self.tasks_lock:
            self.tasks[str(task_id)] = fu

        logger.debug("Creating task {} for function {} with args {}".format(
            task_id, func, args))

        # Pickle the result into object to pass into message buffer
        function_file = self._path_in_task(task_id, "function")
        result_file = self._path_in_task(task_id, "result")
        map_file = self._path_in_task(task_id, "map")

        logger.debug("Creating Task {} with function at: {}".format(
            task_id, function_file))
        logger.debug("Creating Task {} with result to be found at: {}".format(
            task_id, result_file))

        self._serialize_function(function_file, func, args, kwargs)

        if self.pack:
            env_pkg = self._prepare_package(func, self.extra_pkgs)
        else:
            env_pkg = None

        logger.debug(
            "Constructing map for local filenames at worker for task {}".
            format(task_id))
        self._construct_map_file(map_file, input_files, output_files)

        if not self.submit_process.is_alive():
            raise ExecutorError(self, "Workqueue Submit Process is not alive")

        # Create message to put into the message queue
        logger.debug("Placing task {} on message queue".format(task_id))
        category = func.__name__ if self.autocategory else 'parsl-default'
        self.task_queue.put_nowait(
            ParslTaskToWq(task_id, category, cores, memory, disk, gpus,
                          env_pkg, map_file, function_file, result_file,
                          input_files, output_files))

        return fu