def submit(self, func, *args, **kwargs): ''' Add task to the dataflow system. If all deps are met : send to the runnable queue and launch the task Else: post the task in the pending queue Returns: (AppFuture) [DataFutures,] ''' task_id = uuid.uuid4() dep_cnt, depends = self._count_all_deps(task_id, args, kwargs) #dep_cnt = self._count_deps(depends, task_id) task_def = { 'depends': depends, 'func': func, 'args': args, 'kwargs': kwargs, 'callback': None, 'dep_cnt': dep_cnt, 'exec_fu': None, 'status': States.unsched, 'app_fu': None } if task_id in self.tasks: raise DuplicateTaskError( "Task {0} in pending list".format(task_id)) else: self.tasks[task_id] = task_def if dep_cnt == 0: # Set to running logger.debug("Task:%s setting to running", task_id) new_args, kwargs = self.sanitize_and_wrap(task_id, args, kwargs) self.tasks[task_id]['exec_fu'] = self.launch_task( task_id, func, *new_args, **kwargs) self.tasks[task_id]['app_fu'] = AppFuture( self.tasks[task_id]['exec_fu']) self.tasks[task_id]['status'] = States.running logger.debug("Task : %s ", self.tasks[task_id]) else: # Send to pending logger.debug("Task:%s setting to pending", task_id) self.tasks[task_id]['app_fu'] = AppFuture(None) self.tasks[task_id]['status'] = States.pending logger.debug("Task:%s Launched with AppFut:%s", task_id, task_def['app_fu']) return task_def['app_fu']
def submit(self, func, *args, executors='all', fn_hash=None, cache=False, **kwargs): """Add task to the dataflow system. If the app task has the executors attributes not set (default=='all') the task will be launched on a randomly selected executor from the list of executors. If the app task specifies a particular set of executors, it will be targeted at the specified executors. >>> IF all deps are met: >>> send to the runnable queue and launch the task >>> ELSE: >>> post the task in the pending queue Args: - func : A function object - *args : Args to the function KWargs : - executors (list or string) : List of executors this call could go to. Default='all' - fn_hash (Str) : Hash of the function and inputs Default=None - cache (Bool) : To enable memoization or not - kwargs (dict) : Rest of the kwargs to the fn passed as dict. Returns: (AppFuture) [DataFutures,] """ if self.cleanup_called: raise ValueError("Cannot submit to a DFK that has been cleaned up") task_id = self.task_count self.task_count += 1 if isinstance(executors, str) and executors.lower() == 'all': choices = list(e for e in self.executors if e != 'data_manager') elif isinstance(executors, list): choices = executors executor = random.choice(choices) # Transform remote input files to data futures args, kwargs = self._add_input_deps(executor, args, kwargs) task_def = { 'depends': None, 'executor': executor, 'func': func, 'func_name': func.__name__, 'args': args, 'kwargs': kwargs, 'fn_hash': fn_hash, 'memoize': cache, 'callback': None, 'exec_fu': None, 'checkpoint': None, 'fail_count': 0, 'fail_history': [], 'env': None, 'status': States.unsched, 'id': task_id, 'time_submitted': None, 'time_returned': None, 'app_fu': None } if task_id in self.tasks: raise DuplicateTaskError( "internal consistency error: Task {0} already exists in task list" .format(task_id)) else: self.tasks[task_id] = task_def # Get the dep count and a list of dependencies for the task dep_cnt, depends = self._gather_all_deps(args, kwargs) self.tasks[task_id]['depends'] = depends # Extract stdout and stderr to pass to AppFuture: task_stdout = kwargs.get('stdout') task_stderr = kwargs.get('stderr') logger.info("Task {} submitted for App {}, waiting on tasks {}".format( task_id, task_def['func_name'], [fu.tid for fu in depends])) self.tasks[task_id]['task_launch_lock'] = threading.Lock() app_fu = AppFuture(None, tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['app_fu'] = app_fu app_fu.add_done_callback(partial(self.handle_app_update, task_id)) self.tasks[task_id]['status'] = States.pending logger.debug("Task {} set to pending state with AppFuture: {}".format( task_id, task_def['app_fu'])) # at this point add callbacks to all dependencies to do a launch_if_ready # call whenever a dependency completes. # we need to be careful about the order of setting the state to pending, # adding the callbacks, and caling launch_if_ready explicitly once always below. # I think as long as we call launch_if_ready once after setting pending, then # we can add the callback dependencies at any point: if the callbacks all fire # before then, they won't cause a launch, but the one below will. if they fire # after we set it pending, then the last one will cause a launch, and the # explicit one won't. for d in depends: def callback_adapter(dep_fut): self.launch_if_ready(task_id) try: d.add_done_callback(callback_adapter) except Exception as e: logger.error( "add_done_callback got an exception {} which will be ignored" .format(e)) self.launch_if_ready(task_id) return task_def['app_fu']
def submit(self, func, *args, executors='all', fn_hash=None, cache=False, **kwargs): """Add task to the dataflow system. If the app task has the executors attributes not set (default=='all') the task will be launched on a randomly selected executor from the list of executors. If the app task specifies a particular set of executors, it will be targeted at the specified executors. >>> IF all deps are met: >>> send to the runnable queue and launch the task >>> ELSE: >>> post the task in the pending queue Args: - func : A function object - *args : Args to the function KWargs : - executors (list or string) : List of executors this call could go to. Default='all' - fn_hash (Str) : Hash of the function and inputs Default=None - cache (Bool) : To enable memoization or not - kwargs (dict) : Rest of the kwargs to the fn passed as dict. Returns: (AppFuture) [DataFutures,] """ if self.cleanup_called: raise ValueError("Cannot submit to a DFK that has been cleaned up") task_id = self.task_count self.task_count += 1 if isinstance(executors, str) and executors.lower() == 'all': choices = list(e for e in self.executors if e != 'data_manager') elif isinstance(executors, list): choices = executors else: raise ValueError("Task {} supplied invalid type for executors: {}".format(task_id, type(executors))) executor = random.choice(choices) # The below uses func.__name__ before it has been wrapped by any staging code. label = kwargs.get('label') for kw in ['stdout', 'stderr']: if kw in kwargs: if kwargs[kw] == parsl.AUTO_LOGNAME: kwargs[kw] = os.path.join( self.run_dir, 'task_logs', str(int(task_id / 10000)).zfill(4), # limit logs to 10k entries per directory 'task_{}_{}{}.{}'.format( str(task_id).zfill(4), func.__name__, '' if label is None else '_{}'.format(label), kw) ) task_def = {'depends': None, 'executor': executor, 'func_name': func.__name__, 'fn_hash': fn_hash, 'memoize': cache, 'exec_fu': None, 'fail_count': 0, 'fail_history': [], 'status': States.unsched, 'id': task_id, 'time_submitted': None, 'time_returned': None} app_fu = AppFuture(task_def) # Transform remote input files to data futures args, kwargs, func = self._add_input_deps(executor, args, kwargs, func) func = self._add_output_deps(executor, args, kwargs, app_fu, func) task_def.update({ 'args': args, 'func': func, 'kwargs': kwargs, 'app_fu': app_fu}) if task_id in self.tasks: raise DuplicateTaskError( "internal consistency error: Task {0} already exists in task list".format(task_id)) else: self.tasks[task_id] = task_def # Get the list of dependencies for the task depends = self._gather_all_deps(args, kwargs) self.tasks[task_id]['depends'] = depends depend_descs = [] for d in depends: if isinstance(d, AppFuture) or isinstance(d, DataFuture): depend_descs.append("task {}".format(d.tid)) else: depend_descs.append(repr(d)) if depend_descs != []: waiting_message = "waiting on {}".format(", ".join(depend_descs)) else: waiting_message = "not waiting on any dependency" logger.info("Task {} submitted for App {}, {}".format(task_id, task_def['func_name'], waiting_message)) self.tasks[task_id]['task_launch_lock'] = threading.Lock() app_fu.add_done_callback(partial(self.handle_app_update, task_id)) self.tasks[task_id]['status'] = States.pending logger.debug("Task {} set to pending state with AppFuture: {}".format(task_id, task_def['app_fu'])) # at this point add callbacks to all dependencies to do a launch_if_ready # call whenever a dependency completes. # we need to be careful about the order of setting the state to pending, # adding the callbacks, and caling launch_if_ready explicitly once always below. # I think as long as we call launch_if_ready once after setting pending, then # we can add the callback dependencies at any point: if the callbacks all fire # before then, they won't cause a launch, but the one below will. if they fire # after we set it pending, then the last one will cause a launch, and the # explicit one won't. for d in depends: def callback_adapter(dep_fut): self.launch_if_ready(task_id) try: d.add_done_callback(callback_adapter) except Exception as e: logger.error("add_done_callback got an exception {} which will be ignored".format(e)) self.launch_if_ready(task_id) return app_fu
def submit(self, func, *args, parsl_sites='all', fn_hash=None, cache=False, **kwargs): """Add task to the dataflow system. >>> IF all deps are met : >>> send to the runnable queue and launch the task >>> ELSE: >>> post the task in the pending queue Args: - func : A function object - *args : Args to the function KWargs : - parsl_sites (List|String) : List of sites this call could go to. Default='all' - fn_hash (Str) : Hash of the function and inputs Default=None - cache (Bool) : To enable memoization or not - kwargs (dict) : Rest of the kwargs to the fn passed as dict. Returns: (AppFuture) [DataFutures,] """ task_id = self.task_count self.task_count += 1 # Get the dep count and a list of dependencies for the task dep_cnt, depends = self._count_all_deps(task_id, args, kwargs) task_def = { 'depends': depends, 'sites': parsl_sites, 'func': func, 'func_name': func.__name__, 'args': args, 'kwargs': kwargs, 'fn_hash': fn_hash, 'memoize': cache, 'callback': None, 'dep_cnt': dep_cnt, 'exec_fu': None, 'checkpoint': None, 'fail_count': 0, 'fail_history': [], 'env': None, 'status': States.unsched, 'app_fu': None } if task_id in self.tasks: raise DuplicateTaskError( "Task {0} in pending list".format(task_id)) else: self.tasks[task_id] = task_def # Extract stdout and stderr to pass to AppFuture: task_stdout = kwargs.get('stdout', None) task_stderr = kwargs.get('stderr', None) logger.info("Task {} submitted for App {}, waiting on tasks {}".format( task_id, task_def['func_name'], [fu.tid for fu in depends])) # Handle three cases here: # No pending deps # - But has failures -> dep_fail # - No failures -> running # Has pending deps -> pending if dep_cnt == 0: new_args, kwargs, exceptions = self.sanitize_and_wrap( task_id, args, kwargs) self.tasks[task_id]['args'] = new_args self.tasks[task_id]['kwargs'] = kwargs if not exceptions: self.tasks[task_id]['exec_fu'] = self.launch_task( task_id, func, *new_args, **kwargs) self.tasks[task_id]['app_fu'] = AppFuture( self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['status'] = States.running logger.debug("Task {} launched with AppFut:{}".format( task_id, task_def['app_fu'])) else: self.tasks[task_id]['exec_fu'] = None app_fu = AppFuture(self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) app_fu.set_exception( DependencyError(exceptions, "Failures in input dependencies", None)) self.tasks[task_id]['app_fu'] = app_fu self.tasks[task_id]['status'] = States.dep_fail logger.debug( "Task {} failed due to failure in parent task(s):{}". format(task_id, task_def['app_fu'])) else: # Send to pending, create the AppFuture with no parent and have it set # when an executor future is available. self.tasks[task_id]['app_fu'] = AppFuture(None, tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['status'] = States.pending logger.debug("Task {} launched with AppFut:{}".format( task_id, task_def['app_fu'])) return task_def['app_fu']
def submit(self, func, *args, executors='all', fn_hash=None, cache=False, **kwargs): """Add task to the dataflow system. If the app task has the executors attributes not set (default=='all') the task will be launched on a randomly selected executor from the list of executors. This behavior could later be updated to support binding to executors based on user specified criteria. If the app task specifies a particular set of executors, it will be targetted at those specific executors. >>> IF all deps are met: >>> send to the runnable queue and launch the task >>> ELSE: >>> post the task in the pending queue Args: - func : A function object - *args : Args to the function KWargs : - executors (list or string) : List of executors this call could go to. Default='all' - fn_hash (Str) : Hash of the function and inputs Default=None - cache (Bool) : To enable memoization or not - kwargs (dict) : Rest of the kwargs to the fn passed as dict. Returns: (AppFuture) [DataFutures,] """ task_id = self.task_count self.task_count += 1 if isinstance(executors, str) and executors.lower() == 'all': choices = list(e for e in self.executors if e != 'data_manager') elif isinstance(executors, list): choices = executors executor = random.choice(choices) task_def = { 'depends': None, 'executor': executor, 'func': func, 'func_name': func.__name__, 'args': args, 'kwargs': kwargs, 'fn_hash': fn_hash, 'memoize': cache, 'callback': None, 'dep_cnt': None, 'exec_fu': None, 'checkpoint': None, 'fail_count': 0, 'fail_history': [], 'env': None, 'status': States.unsched, 'app_fu': None } if task_id in self.tasks: raise DuplicateTaskError( "Task {0} in pending list".format(task_id)) else: self.tasks[task_id] = task_def # Transform remote input files to data futures self._add_input_deps(executor, args, kwargs) # Get the dep count and a list of dependencies for the task dep_cnt, depends = self._count_all_deps(task_id, args, kwargs) self.tasks[task_id]['dep_cnt'] = dep_cnt self.tasks[task_id]['depends'] = depends # Extract stdout and stderr to pass to AppFuture: task_stdout = kwargs.get('stdout') task_stderr = kwargs.get('stderr') logger.info("Task {} submitted for App {}, waiting on tasks {}".format( task_id, task_def['func_name'], [fu.tid for fu in depends])) # Handle three cases here: # No pending deps # - But has failures -> dep_fail # - No failures -> running # Has pending deps -> pending if dep_cnt == 0: new_args, kwargs, exceptions = self.sanitize_and_wrap( task_id, args, kwargs) self.tasks[task_id]['args'] = new_args self.tasks[task_id]['kwargs'] = kwargs if not exceptions: self.tasks[task_id]['exec_fu'] = self.launch_task( task_id, func, *new_args, **kwargs) self.tasks[task_id]['app_fu'] = AppFuture( self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) logger.debug("Task {} launched with AppFuture: {}".format( task_id, task_def['app_fu'])) else: fu = Future() fu.set_exception( DependencyError(exceptions, "Failures in input dependencies", None)) fu.retries_left = 0 self.tasks[task_id]['exec_fu'] = fu app_fu = AppFuture(self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['app_fu'] = app_fu self.tasks[task_id]['status'] = States.dep_fail logger.debug( "Task {} failed due to failure in parent task(s):{}". format(task_id, task_def['app_fu'])) else: # Send to pending, create the AppFuture with no parent and have it set # when an executor future is available. self.tasks[task_id]['app_fu'] = AppFuture(None, tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['status'] = States.pending logger.debug("Task {} launched with AppFuture: {}".format( task_id, task_def['app_fu'])) return task_def['app_fu']
def submit (self, func, *args, parsl_sites='all', **kwargs): ''' Add task to the dataflow system. Args: func : A function object *args : Args to the function KWargs : Standard kwargs to the func as provided by the user parsl_sites : List of sites as defined in the config, Default :'all' This is the only kwarg that is passed in by the app definition. If all deps are met : send to the runnable queue and launch the task Else: post the task in the pending queue Returns: (AppFuture) [DataFutures,] ''' task_id = self.task_count self.task_count += 1 # Get the dep count and a list of dependencies for the task dep_cnt, depends = self._count_all_deps(task_id, args, kwargs) task_def = { 'depends' : depends, 'sites' : parsl_sites, 'func' : func, 'func_name' : func.__name__, 'args' : args, 'kwargs' : kwargs, 'callback' : None, 'dep_cnt' : dep_cnt, 'exec_fu' : None, 'status' : States.unsched, 'app_fu' : None } if task_id in self.tasks: raise DuplicateTaskError("Task {0} in pending list".format(task_id)) else: self.tasks[task_id] = task_def # Extract stdout and stderr to pass to AppFuture: task_stdout = kwargs.get('stdout', None) task_stderr = kwargs.get('stderr', None) if dep_cnt == 0 : # Set to running new_args, kwargs, exceptions = self.sanitize_and_wrap(task_id, args, kwargs) if not exceptions: self.tasks[task_id]['exec_fu'] = self.launch_task(task_id, func, *new_args, **kwargs) self.tasks[task_id]['app_fu'] = AppFuture(self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['status'] = States.running else: self.tasks[task_id]['exec_fu'] = None app_fu = AppFuture(self.tasks[task_id]['exec_fu'], tid=task_id, stdout=task_stdout, stderr=task_stderr) app_fu.set_exception(DependencyError(exceptions, "Failures in input dependencies", None)) self.tasks[task_id]['app_fu'] = app_fu self.tasks[task_id]['status'] = States.dep_fail else: # Send to pending, create the AppFuture with no parent and have it set # when an executor future is available. self.tasks[task_id]['app_fu'] = AppFuture(None, tid=task_id, stdout=task_stdout, stderr=task_stderr) self.tasks[task_id]['status'] = States.pending logger.debug("Task:%s Launched with AppFut:%s", task_id, task_def['app_fu']) return task_def['app_fu']