def _collect_work_queue_results(self): """Sets the values of tasks' futures of tasks completed by work queue. """ logger.debug("Starting Collector Thread") try: while not self.should_stop.value: if not self.submit_process.is_alive(): raise ExecutorError(self, "Workqueue Submit Process is not alive") # Get the result message from the collector_queue try: task_report = self.collector_queue.get(timeout=1) except queue.Empty: continue # Obtain the future from the tasks dictionary with self.tasks_lock: future = self.tasks[task_report.id] logger.debug("Updating Future for Parsl Task {}".format(task_report.id)) if task_report.result_received: future.set_result(task_report.result) else: # If there are no results, then the task failed according to one of # work queue modes, such as resource exhaustion. future.set_exception(WorkQueueTaskFailure(task_report.reason, task_report.result)) finally: with self.tasks_lock: # set exception for tasks waiting for results that work queue did not execute for fu in self.tasks.values(): if not fu.done(): fu.set_exception(WorkQueueFailure("work queue executor failed to execute the task.")) logger.debug("Exiting Collector Thread")
def WorkQueueCollectorThread(collector_queue=multiprocessing.Queue(), tasks={}, tasks_lock=threading.Lock(), cancel_value=multiprocessing.Value('i', 1), submit_process=None, executor=None): """Processes completed Parsl tasks. If an error arose while the Parsl task was executed, raises the exception on the local machine. """ logger.debug("Starting Collector Thread") continue_running = True while continue_running: if cancel_value.value == 0: continue_running = False continue # The WorkQueue process that creates task has died if not submit_process.is_alive() and cancel_value.value != 0: raise ExecutorError(executor, "Workqueue Submit Process is not alive") # Get the result message from the collector_queue try: item = collector_queue.get(timeout=1) except queue.Empty: continue parsl_tid = item["tid"] received = item["result_received"] # Obtain the future from the tasks dictionary tasks_lock.acquire() future = tasks[parsl_tid] tasks_lock.release() # Failed task if received is False: reason = item["reason"] status = item["status"] future.set_exception(AppFailure(reason, status)) # Successful task else: result = item["result"] future_update = result["result"] logger.debug("Updating Future for Parsl Task {}".format(parsl_tid)) if result["failure"] is False: future.set_result(future_update) else: future_fail = pickle.loads(future_update) exc = RemoteExceptionWrapper(*future_fail) try: exc.reraise() except Exception as e: future.set_exception(e) logger.debug("Exiting Collector Thread") return
def WorkQueueCollectorThread(collector_queue=multiprocessing.Queue(), tasks={}, tasks_lock=threading.Lock(), cancel_value=multiprocessing.Value('i', 1), submit_process=None, executor=None): logger.debug("Starting Collector Thread") continue_running = True while continue_running: if cancel_value.value == 0: continue_running = False continue if not submit_process.is_alive() and cancel_value.value != 0: raise ExecutorError(executor, "Workqueue Submit Process is not alive") try: item = collector_queue.get(timeout=1) except queue.Empty: continue parsl_tid = item["tid"] received = item["result_received"] tasks_lock.acquire() future = tasks[parsl_tid] tasks_lock.release() if received is False: reason = item["reason"] status = item["status"] future.set_exception(AppFailure(reason, status)) else: result = item["result"] future_update, _ = deserialize_object(result["result"]) logger.debug("Updating Future for Parsl Task {}".format(parsl_tid)) if result["failure"] is False: future.set_result(future_update) else: future.set_exception(RemoteExceptionWrapper(*future_update)) logger.debug("Exiting Collector Thread") return
def submit(self, func, resource_specification, *args, **kwargs): """Processes the Parsl app by its arguments and submits the function information to the task queue, to be executed using the Work Queue system. The args and kwargs are processed for input and output files to the Parsl app, so that the files are appropriately specified for the Work Queue task. Parameters ---------- func : function Parsl app to be submitted to the Work Queue system args : list Arguments to the Parsl app kwargs : dict Keyword arguments to the Parsl app """ self.task_counter += 1 task_id = self.task_counter # Create a per task directory for the function, result, map, and result files os.mkdir(self._path_in_task(task_id)) input_files = [] output_files = [] # Determine the input and output files that will exist at the workes: input_files += [ self._register_file(f) for f in kwargs.get("inputs", []) if isinstance(f, File) ] output_files += [ self._register_file(f) for f in kwargs.get("outputs", []) if isinstance(f, File) ] # Also consider any *arg that looks like a file as an input: input_files += [ self._register_file(f) for f in args if isinstance(f, File) ] for kwarg, maybe_file in kwargs.items(): # Add appropriate input and output files from "stdout" and "stderr" keyword arguments if kwarg == "stdout" or kwarg == "stderr": if maybe_file: output_files.append( self._std_output_to_wq(kwarg, maybe_file)) # For any other keyword that looks like a file, assume it is an input file elif isinstance(maybe_file, File): input_files.append(self._register_file(maybe_file)) # Create a Future object and have it be mapped from the task ID in the tasks dictionary fu = Future() with self.tasks_lock: self.tasks[str(task_id)] = fu logger.debug("Creating task {} for function {} with args {}".format( task_id, func, args)) # Pickle the result into object to pass into message buffer function_file = self._path_in_task(task_id, "function") result_file = self._path_in_task(task_id, "result") map_file = self._path_in_task(task_id, "map") logger.debug("Creating Task {} with function at: {}".format( task_id, function_file)) logger.debug("Creating Task {} with result to be found at: {}".format( task_id, result_file)) self._serialize_function(function_file, func, args, kwargs) if self.pack: env_pkg = self._prepare_package(func) else: env_pkg = None logger.debug( "Constructing map for local filenames at worker for task {}". format(task_id)) self._construct_map_file(map_file, input_files, output_files) if not self.submit_process.is_alive(): raise ExecutorError(self, "Workqueue Submit Process is not alive") # Create message to put into the message queue logger.debug("Placing task {} on message queue".format(task_id)) category = func.__qualname__ if self.autocategory else 'parsl-default' self.task_queue.put_nowait( ParslTaskToWq(task_id, category, env_pkg, map_file, function_file, result_file, input_files, output_files)) return fu
def submit(self, func, *args, **kwargs): """Processes the Parsl app by its arguments and submits the function information to the task queue, to be executed using the Work Queue system. The args and kwargs are processed for input and output files to the Parsl app, so that the files are appropriately specified for the Work Queue task. Parameters ---------- func : function Parsl app to be submitted to the Work Queue system args : list Arguments to the Parsl app kwargs : dict Keyword arguments to the Parsl app """ self.task_counter += 1 task_id = self.task_counter input_files = [] output_files = [] std_files = [] # Add input files from the "inputs" keyword argument func_inputs = kwargs.get("inputs", []) for inp in func_inputs: if isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) for kwarg, inp in kwargs.items(): # Add appropriate input and output files from "stdout" and "stderr" keyword arguments if kwarg == "stdout" or kwarg == "stderr": if (isinstance(inp, tuple) and len(inp) > 1 and isinstance(inp[0], str) and isinstance(inp[1], str)) or isinstance(inp, str): if isinstance(inp, tuple): inp = inp[0] if not os.path.exists( os.path.join(".", os.path.split(inp)[0])): continue # Create "std" files instead of input or output files if inp in self.registered_files: input_files.append( (inp, os.path.basename(inp) + "-1", False, "std")) output_files.append( (inp, os.path.basename(inp), False, "std")) else: output_files.append( (inp, os.path.basename(inp), False, "std")) self.registered_files.add(inp) # Add to input file if passed-in argument is a File object elif isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) # Add to input file if passed-in argument is a File object for inp in args: if isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) # Add output files from the "outputs" keyword argument func_outputs = kwargs.get("outputs", []) for output in func_outputs: if isinstance(output, File): output_files.append(self.create_name_tuple(output, "out")) if not self.submit_process.is_alive(): raise ExecutorError(self, "Workqueue Submit Process is not alive") # Create a Future object and have it be mapped from the task ID in the tasks dictionary fu = Future() self.tasks_lock.acquire() self.tasks[str(task_id)] = fu self.tasks_lock.release() logger.debug("Creating task {} for function {} with args {}".format( task_id, func, args)) # Pickle the result into object to pass into message buffer function_data_file = os.path.join( self.function_data_dir, "task_" + str(task_id) + "_function_data") function_result_file = os.path.join( self.function_data_dir, "task_" + str(task_id) + "_function_result") logger.debug("Creating Task {} with executable at: {}".format( task_id, function_data_file)) logger.debug("Creating Task {} with result to be found at: {}".format( task_id, function_result_file)) # Obtain function information and put into dictionary if self.source: # Obtain function information and put into dictionary source_code = inspect.getsource(func) name = func.__name__ function_info = { "source code": source_code, "name": name, "args": args, "kwargs": kwargs } # Pack the function data into file f = open(function_data_file, "wb") pickle.dump(function_info, f) f.close() else: # Serialize function information function_info = pack_apply_message(func, args, kwargs, buffer_threshold=1024 * 1024, item_threshold=1024) # Pack the function data into file f = open(function_data_file, "wb") pickle.dump(function_info, f) f.close() # Create message to put into the message queue logger.debug("Placing task {} on message queue".format(task_id)) msg = { "task_id": task_id, "data_loc": function_data_file, "result_loc": function_result_file, "input_files": input_files, "output_files": output_files, "std_files": std_files } self.task_queue.put_nowait(msg) return fu
def submit(self, func, *args, **kwargs): """Submit. We haven't yet decided on what the args to this can be, whether it should just be func, args, kwargs or be the partially evaluated fn """ self.task_counter += 1 task_id = self.task_counter input_files = [] output_files = [] std_files = [] func_inputs = kwargs.get("inputs", []) for inp in func_inputs: if isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) for kwarg, inp in kwargs.items(): if kwarg == "stdout" or kwarg == "stderr": if (isinstance(inp, tuple) and len(inp) > 1 and isinstance(inp[0], str) and isinstance(inp[1], str)) or isinstance(inp, str): if isinstance(inp, tuple): inp = inp[0] print(os.path.split(inp)) if not os.path.exists( os.path.join(".", os.path.split(inp)[0])): continue if inp in self.registered_files: input_files.append( (inp, os.path.basename(inp) + "-1", False, "std")) output_files.append( (inp, os.path.basename(inp), False, "std")) else: output_files.append( (inp, os.path.basename(inp), False, "std")) self.registered_files.add(inp) elif isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) for inp in args: if isinstance(inp, File): input_files.append(self.create_name_tuple(inp, "in")) func_outputs = kwargs.get("outputs", []) for output in func_outputs: if isinstance(output, File): output_files.append(self.create_name_tuple(output, "out")) if not self.submit_process.is_alive(): raise ExecutorError(self, "Workqueue Submit Process is not alive") fu = Future() self.tasks_lock.acquire() self.tasks[str(task_id)] = fu self.tasks_lock.release() logger.debug("Creating task {} for function {} with args {}".format( task_id, func, args)) # Pickle the result into object to pass into message buffer # TODO Try/Except Block function_data_file = os.path.join( self.function_data_dir, "task_" + str(task_id) + "_function_data") function_result_file = os.path.join( self.function_data_dir, "task_" + str(task_id) + "_function_result") logger.debug("Creating Task {} with executable at: {}".format( task_id, function_data_file)) logger.debug("Creating Task {} with result to be found at: {}".format( task_id, function_result_file)) f = open(function_data_file, "wb") fn_buf = pack_apply_message(func, args, kwargs, buffer_threshold=1024 * 1024, item_threshold=1024) pickle.dump(fn_buf, f) f.close() logger.debug("Placing task {} on message queue".format(task_id)) msg = { "task_id": task_id, "data_loc": function_data_file, "result_loc": function_result_file, "input_files": input_files, "output_files": output_files, "std_files": std_files } self.task_queue.put_nowait(msg) return fu
def submit(self, func, resource_specification, *args, **kwargs): """Processes the Parsl app by its arguments and submits the function information to the task queue, to be executed using the Work Queue system. The args and kwargs are processed for input and output files to the Parsl app, so that the files are appropriately specified for the Work Queue task. Parameters ---------- func : function Parsl app to be submitted to the Work Queue system args : list Arguments to the Parsl app kwargs : dict Keyword arguments to the Parsl app """ cores = None memory = None disk = None gpus = None if resource_specification and isinstance(resource_specification, dict): logger.debug("Got resource specification: {}".format( resource_specification)) required_resource_types = set(['cores', 'memory', 'disk']) acceptable_resource_types = set( ['cores', 'memory', 'disk', 'gpus']) keys = set(resource_specification.keys()) if not keys.issubset(acceptable_resource_types): message = "Task resource specification only accepts these types of resources: {}".format( ', '.join(acceptable_resource_types)) logger.error(message) raise ExecutorError(self, message) if not self.autolabel and not keys.issuperset( required_resource_types): logger.error( "Running with `autolabel=False`. In this mode, " "task resource specification requires " "three resources to be specified simultaneously: cores, memory, and disk" ) raise ExecutorError( self, "Task resource specification requires " "three resources to be specified simultaneously: cores, memory, and disk. " "Try setting autolabel=True if you are unsure of the resource usage" ) for k in keys: if k == 'cores': cores = resource_specification[k] elif k == 'memory': memory = resource_specification[k] elif k == 'disk': disk = resource_specification[k] elif k == 'gpus': gpus = resource_specification[k] self.task_counter += 1 task_id = self.task_counter # Create a per task directory for the function, result, map, and result files os.mkdir(self._path_in_task(task_id)) input_files = [] output_files = [] # Determine the input and output files that will exist at the workes: input_files += [ self._register_file(f) for f in kwargs.get("inputs", []) if isinstance(f, File) ] output_files += [ self._register_file(f) for f in kwargs.get("outputs", []) if isinstance(f, File) ] # Also consider any *arg that looks like a file as an input: input_files += [ self._register_file(f) for f in args if isinstance(f, File) ] for kwarg, maybe_file in kwargs.items(): # Add appropriate input and output files from "stdout" and "stderr" keyword arguments if kwarg == "stdout" or kwarg == "stderr": if maybe_file: output_files.append( self._std_output_to_wq(kwarg, maybe_file)) # For any other keyword that looks like a file, assume it is an input file elif isinstance(maybe_file, File): input_files.append(self._register_file(maybe_file)) # Create a Future object and have it be mapped from the task ID in the tasks dictionary fu = Future() with self.tasks_lock: self.tasks[str(task_id)] = fu logger.debug("Creating task {} for function {} with args {}".format( task_id, func, args)) # Pickle the result into object to pass into message buffer function_file = self._path_in_task(task_id, "function") result_file = self._path_in_task(task_id, "result") map_file = self._path_in_task(task_id, "map") logger.debug("Creating Task {} with function at: {}".format( task_id, function_file)) logger.debug("Creating Task {} with result to be found at: {}".format( task_id, result_file)) self._serialize_function(function_file, func, args, kwargs) if self.pack: env_pkg = self._prepare_package(func, self.extra_pkgs) else: env_pkg = None logger.debug( "Constructing map for local filenames at worker for task {}". format(task_id)) self._construct_map_file(map_file, input_files, output_files) if not self.submit_process.is_alive(): raise ExecutorError(self, "Workqueue Submit Process is not alive") # Create message to put into the message queue logger.debug("Placing task {} on message queue".format(task_id)) category = func.__name__ if self.autocategory else 'parsl-default' self.task_queue.put_nowait( ParslTaskToWq(task_id, category, cores, memory, disk, gpus, env_pkg, map_file, function_file, result_file, input_files, output_files)) return fu