Exemple #1
0
    def submit(self, func, *args, **kwargs):
        ''' Add task to the dataflow system.

        If all deps are met :
              send to the runnable queue
              and launch the task
        Else:
              post the task in the pending queue

        Returns:
               (AppFuture) [DataFutures,]
        '''

        task_id = uuid.uuid4()
        dep_cnt, depends = self._count_all_deps(task_id, args, kwargs)

        #dep_cnt  = self._count_deps(depends, task_id)
        task_def = {
            'depends': depends,
            'func': func,
            'args': args,
            'kwargs': kwargs,
            'callback': None,
            'dep_cnt': dep_cnt,
            'exec_fu': None,
            'status': States.unsched,
            'app_fu': None
        }

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "Task {0} in pending list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        if dep_cnt == 0:
            # Set to running
            logger.debug("Task:%s setting to running", task_id)
            new_args, kwargs = self.sanitize_and_wrap(task_id, args, kwargs)
            self.tasks[task_id]['exec_fu'] = self.launch_task(
                task_id, func, *new_args, **kwargs)
            self.tasks[task_id]['app_fu'] = AppFuture(
                self.tasks[task_id]['exec_fu'])
            self.tasks[task_id]['status'] = States.running
            logger.debug("Task : %s ", self.tasks[task_id])
        else:
            # Send to pending
            logger.debug("Task:%s setting to pending", task_id)
            self.tasks[task_id]['app_fu'] = AppFuture(None)
            self.tasks[task_id]['status'] = States.pending

        logger.debug("Task:%s Launched with AppFut:%s", task_id,
                     task_def['app_fu'])
        return task_def['app_fu']
Exemple #2
0
    def submit(self,
               func,
               *args,
               executors='all',
               fn_hash=None,
               cache=False,
               **kwargs):
        """Add task to the dataflow system.

        If the app task has the executors attributes not set (default=='all')
        the task will be launched on a randomly selected executor from the
        list of executors. If the app task specifies a particular set of
        executors, it will be targeted at the specified executors.

        >>> IF all deps are met:
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - executors (list or string) : List of executors this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """

        if self.cleanup_called:
            raise ValueError("Cannot submit to a DFK that has been cleaned up")

        task_id = self.task_count
        self.task_count += 1
        if isinstance(executors, str) and executors.lower() == 'all':
            choices = list(e for e in self.executors if e != 'data_manager')
        elif isinstance(executors, list):
            choices = executors
        executor = random.choice(choices)

        # Transform remote input files to data futures
        args, kwargs = self._add_input_deps(executor, args, kwargs)

        task_def = {
            'depends': None,
            'executor': executor,
            'func': func,
            'func_name': func.__name__,
            'args': args,
            'kwargs': kwargs,
            'fn_hash': fn_hash,
            'memoize': cache,
            'callback': None,
            'exec_fu': None,
            'checkpoint': None,
            'fail_count': 0,
            'fail_history': [],
            'env': None,
            'status': States.unsched,
            'id': task_id,
            'time_submitted': None,
            'time_returned': None,
            'app_fu': None
        }

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "internal consistency error: Task {0} already exists in task list"
                .format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Get the dep count and a list of dependencies for the task
        dep_cnt, depends = self._gather_all_deps(args, kwargs)
        self.tasks[task_id]['depends'] = depends

        # Extract stdout and stderr to pass to AppFuture:
        task_stdout = kwargs.get('stdout')
        task_stderr = kwargs.get('stderr')

        logger.info("Task {} submitted for App {}, waiting on tasks {}".format(
            task_id, task_def['func_name'], [fu.tid for fu in depends]))

        self.tasks[task_id]['task_launch_lock'] = threading.Lock()
        app_fu = AppFuture(None,
                           tid=task_id,
                           stdout=task_stdout,
                           stderr=task_stderr)

        self.tasks[task_id]['app_fu'] = app_fu
        app_fu.add_done_callback(partial(self.handle_app_update, task_id))
        self.tasks[task_id]['status'] = States.pending
        logger.debug("Task {} set to pending state with AppFuture: {}".format(
            task_id, task_def['app_fu']))

        # at this point add callbacks to all dependencies to do a launch_if_ready
        # call whenever a dependency completes.

        # we need to be careful about the order of setting the state to pending,
        # adding the callbacks, and caling launch_if_ready explicitly once always below.

        # I think as long as we call launch_if_ready once after setting pending, then
        # we can add the callback dependencies at any point: if the callbacks all fire
        # before then, they won't cause a launch, but the one below will. if they fire
        # after we set it pending, then the last one will cause a launch, and the
        # explicit one won't.

        for d in depends:

            def callback_adapter(dep_fut):
                self.launch_if_ready(task_id)

            try:
                d.add_done_callback(callback_adapter)
            except Exception as e:
                logger.error(
                    "add_done_callback got an exception {} which will be ignored"
                    .format(e))

        self.launch_if_ready(task_id)

        return task_def['app_fu']
Exemple #3
0
    def submit(self, func, *args, executors='all', fn_hash=None, cache=False, **kwargs):
        """Add task to the dataflow system.

        If the app task has the executors attributes not set (default=='all')
        the task will be launched on a randomly selected executor from the
        list of executors. If the app task specifies a particular set of
        executors, it will be targeted at the specified executors.

        >>> IF all deps are met:
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - executors (list or string) : List of executors this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """

        if self.cleanup_called:
            raise ValueError("Cannot submit to a DFK that has been cleaned up")

        task_id = self.task_count
        self.task_count += 1
        if isinstance(executors, str) and executors.lower() == 'all':
            choices = list(e for e in self.executors if e != 'data_manager')
        elif isinstance(executors, list):
            choices = executors
        else:
            raise ValueError("Task {} supplied invalid type for executors: {}".format(task_id, type(executors)))
        executor = random.choice(choices)

        # The below uses func.__name__ before it has been wrapped by any staging code.

        label = kwargs.get('label')
        for kw in ['stdout', 'stderr']:
            if kw in kwargs:
                if kwargs[kw] == parsl.AUTO_LOGNAME:
                    kwargs[kw] = os.path.join(
                            self.run_dir,
                            'task_logs',
                            str(int(task_id / 10000)).zfill(4),  # limit logs to 10k entries per directory
                            'task_{}_{}{}.{}'.format(
                                str(task_id).zfill(4),
                                func.__name__,
                                '' if label is None else '_{}'.format(label),
                                kw)
                    )

        task_def = {'depends': None,
                    'executor': executor,
                    'func_name': func.__name__,
                    'fn_hash': fn_hash,
                    'memoize': cache,
                    'exec_fu': None,
                    'fail_count': 0,
                    'fail_history': [],
                    'status': States.unsched,
                    'id': task_id,
                    'time_submitted': None,
                    'time_returned': None}

        app_fu = AppFuture(task_def)

        # Transform remote input files to data futures
        args, kwargs, func = self._add_input_deps(executor, args, kwargs, func)

        func = self._add_output_deps(executor, args, kwargs, app_fu, func)

        task_def.update({
                    'args': args,
                    'func': func,
                    'kwargs': kwargs,
                    'app_fu': app_fu})

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "internal consistency error: Task {0} already exists in task list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Get the list of dependencies for the task
        depends = self._gather_all_deps(args, kwargs)
        self.tasks[task_id]['depends'] = depends

        depend_descs = []
        for d in depends:
            if isinstance(d, AppFuture) or isinstance(d, DataFuture):
                depend_descs.append("task {}".format(d.tid))
            else:
                depend_descs.append(repr(d))

        if depend_descs != []:
            waiting_message = "waiting on {}".format(", ".join(depend_descs))
        else:
            waiting_message = "not waiting on any dependency"

        logger.info("Task {} submitted for App {}, {}".format(task_id,
                                                              task_def['func_name'],
                                                              waiting_message))

        self.tasks[task_id]['task_launch_lock'] = threading.Lock()

        app_fu.add_done_callback(partial(self.handle_app_update, task_id))
        self.tasks[task_id]['status'] = States.pending
        logger.debug("Task {} set to pending state with AppFuture: {}".format(task_id, task_def['app_fu']))

        # at this point add callbacks to all dependencies to do a launch_if_ready
        # call whenever a dependency completes.

        # we need to be careful about the order of setting the state to pending,
        # adding the callbacks, and caling launch_if_ready explicitly once always below.

        # I think as long as we call launch_if_ready once after setting pending, then
        # we can add the callback dependencies at any point: if the callbacks all fire
        # before then, they won't cause a launch, but the one below will. if they fire
        # after we set it pending, then the last one will cause a launch, and the
        # explicit one won't.

        for d in depends:

            def callback_adapter(dep_fut):
                self.launch_if_ready(task_id)

            try:
                d.add_done_callback(callback_adapter)
            except Exception as e:
                logger.error("add_done_callback got an exception {} which will be ignored".format(e))

        self.launch_if_ready(task_id)

        return app_fu
Exemple #4
0
    def submit(self,
               func,
               *args,
               parsl_sites='all',
               fn_hash=None,
               cache=False,
               **kwargs):
        """Add task to the dataflow system.

        >>> IF all deps are met :
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - parsl_sites (List|String) : List of sites this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """
        task_id = self.task_count
        self.task_count += 1

        # Get the dep count and a list of dependencies for the task
        dep_cnt, depends = self._count_all_deps(task_id, args, kwargs)

        task_def = {
            'depends': depends,
            'sites': parsl_sites,
            'func': func,
            'func_name': func.__name__,
            'args': args,
            'kwargs': kwargs,
            'fn_hash': fn_hash,
            'memoize': cache,
            'callback': None,
            'dep_cnt': dep_cnt,
            'exec_fu': None,
            'checkpoint': None,
            'fail_count': 0,
            'fail_history': [],
            'env': None,
            'status': States.unsched,
            'app_fu': None
        }

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "Task {0} in pending list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Extract stdout and stderr to pass to AppFuture:
        task_stdout = kwargs.get('stdout', None)
        task_stderr = kwargs.get('stderr', None)

        logger.info("Task {} submitted for App {}, waiting on tasks {}".format(
            task_id, task_def['func_name'], [fu.tid for fu in depends]))

        # Handle three cases here:
        # No pending deps
        #     - But has failures -> dep_fail
        #     - No failures -> running
        # Has pending deps -> pending
        if dep_cnt == 0:

            new_args, kwargs, exceptions = self.sanitize_and_wrap(
                task_id, args, kwargs)
            self.tasks[task_id]['args'] = new_args
            self.tasks[task_id]['kwargs'] = kwargs

            if not exceptions:
                self.tasks[task_id]['exec_fu'] = self.launch_task(
                    task_id, func, *new_args, **kwargs)
                self.tasks[task_id]['app_fu'] = AppFuture(
                    self.tasks[task_id]['exec_fu'],
                    tid=task_id,
                    stdout=task_stdout,
                    stderr=task_stderr)
                self.tasks[task_id]['status'] = States.running
                logger.debug("Task {} launched with AppFut:{}".format(
                    task_id, task_def['app_fu']))

            else:
                self.tasks[task_id]['exec_fu'] = None
                app_fu = AppFuture(self.tasks[task_id]['exec_fu'],
                                   tid=task_id,
                                   stdout=task_stdout,
                                   stderr=task_stderr)
                app_fu.set_exception(
                    DependencyError(exceptions,
                                    "Failures in input dependencies", None))
                self.tasks[task_id]['app_fu'] = app_fu
                self.tasks[task_id]['status'] = States.dep_fail
                logger.debug(
                    "Task {} failed due to failure in parent task(s):{}".
                    format(task_id, task_def['app_fu']))

        else:
            # Send to pending, create the AppFuture with no parent and have it set
            # when an executor future is available.
            self.tasks[task_id]['app_fu'] = AppFuture(None,
                                                      tid=task_id,
                                                      stdout=task_stdout,
                                                      stderr=task_stderr)
            self.tasks[task_id]['status'] = States.pending
            logger.debug("Task {} launched with AppFut:{}".format(
                task_id, task_def['app_fu']))

        return task_def['app_fu']
Exemple #5
0
    def submit(self,
               func,
               *args,
               executors='all',
               fn_hash=None,
               cache=False,
               **kwargs):
        """Add task to the dataflow system.

        If the app task has the executors attributes not set (default=='all')
        the task will be launched on a randomly selected executor from the
        list of executors. This behavior could later be updated to support
        binding to executors based on user specified criteria.

        If the app task specifies a particular set of executors, it will be
        targetted at those specific executors.

        >>> IF all deps are met:
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - executors (list or string) : List of executors this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """
        task_id = self.task_count
        self.task_count += 1
        if isinstance(executors, str) and executors.lower() == 'all':
            choices = list(e for e in self.executors if e != 'data_manager')
        elif isinstance(executors, list):
            choices = executors
        executor = random.choice(choices)

        task_def = {
            'depends': None,
            'executor': executor,
            'func': func,
            'func_name': func.__name__,
            'args': args,
            'kwargs': kwargs,
            'fn_hash': fn_hash,
            'memoize': cache,
            'callback': None,
            'dep_cnt': None,
            'exec_fu': None,
            'checkpoint': None,
            'fail_count': 0,
            'fail_history': [],
            'env': None,
            'status': States.unsched,
            'app_fu': None
        }

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "Task {0} in pending list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Transform remote input files to data futures
        self._add_input_deps(executor, args, kwargs)

        # Get the dep count and a list of dependencies for the task
        dep_cnt, depends = self._count_all_deps(task_id, args, kwargs)
        self.tasks[task_id]['dep_cnt'] = dep_cnt
        self.tasks[task_id]['depends'] = depends

        # Extract stdout and stderr to pass to AppFuture:
        task_stdout = kwargs.get('stdout')
        task_stderr = kwargs.get('stderr')

        logger.info("Task {} submitted for App {}, waiting on tasks {}".format(
            task_id, task_def['func_name'], [fu.tid for fu in depends]))

        # Handle three cases here:
        # No pending deps
        #     - But has failures -> dep_fail
        #     - No failures -> running
        # Has pending deps -> pending
        if dep_cnt == 0:

            new_args, kwargs, exceptions = self.sanitize_and_wrap(
                task_id, args, kwargs)
            self.tasks[task_id]['args'] = new_args
            self.tasks[task_id]['kwargs'] = kwargs

            if not exceptions:
                self.tasks[task_id]['exec_fu'] = self.launch_task(
                    task_id, func, *new_args, **kwargs)
                self.tasks[task_id]['app_fu'] = AppFuture(
                    self.tasks[task_id]['exec_fu'],
                    tid=task_id,
                    stdout=task_stdout,
                    stderr=task_stderr)
                logger.debug("Task {} launched with AppFuture: {}".format(
                    task_id, task_def['app_fu']))

            else:
                fu = Future()
                fu.set_exception(
                    DependencyError(exceptions,
                                    "Failures in input dependencies", None))
                fu.retries_left = 0
                self.tasks[task_id]['exec_fu'] = fu
                app_fu = AppFuture(self.tasks[task_id]['exec_fu'],
                                   tid=task_id,
                                   stdout=task_stdout,
                                   stderr=task_stderr)
                self.tasks[task_id]['app_fu'] = app_fu
                self.tasks[task_id]['status'] = States.dep_fail
                logger.debug(
                    "Task {} failed due to failure in parent task(s):{}".
                    format(task_id, task_def['app_fu']))

        else:
            # Send to pending, create the AppFuture with no parent and have it set
            # when an executor future is available.
            self.tasks[task_id]['app_fu'] = AppFuture(None,
                                                      tid=task_id,
                                                      stdout=task_stdout,
                                                      stderr=task_stderr)
            self.tasks[task_id]['status'] = States.pending
            logger.debug("Task {} launched with AppFuture: {}".format(
                task_id, task_def['app_fu']))

        return task_def['app_fu']
Exemple #6
0
    def submit (self, func, *args, parsl_sites='all', **kwargs):
        ''' Add task to the dataflow system.

        Args:
             func : A function object
             *args : Args to the function

        KWargs :
             Standard kwargs to the func as provided by the user
             parsl_sites : List of sites as defined in the config, Default :'all'
             This is the only kwarg that is passed in by the app definition.

        If all deps are met :
              send to the runnable queue
              and launch the task
        Else:
              post the task in the pending queue

        Returns:
               (AppFuture) [DataFutures,]
        '''

        task_id = self.task_count
        self.task_count += 1

        # Get the dep count and a list of dependencies for the task
        dep_cnt, depends = self._count_all_deps(task_id, args, kwargs)

        task_def = { 'depends'    : depends,
                     'sites'      : parsl_sites,
                     'func'       : func,
                     'func_name'  : func.__name__,
                     'args'       : args,
                     'kwargs'     : kwargs,
                     'callback'   : None,
                     'dep_cnt'    : dep_cnt,
                     'exec_fu'    : None,
                     'status'     : States.unsched,
                     'app_fu'     : None  }

        if task_id in self.tasks:
            raise DuplicateTaskError("Task {0} in pending list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Extract stdout and stderr to pass to AppFuture:
        task_stdout = kwargs.get('stdout', None)
        task_stderr = kwargs.get('stderr', None)

        if dep_cnt == 0 :
            # Set to running
            new_args, kwargs, exceptions = self.sanitize_and_wrap(task_id, args, kwargs)
            if not exceptions:
                self.tasks[task_id]['exec_fu'] = self.launch_task(task_id, func, *new_args, **kwargs)
                self.tasks[task_id]['app_fu']  = AppFuture(self.tasks[task_id]['exec_fu'],
                                                           tid=task_id,
                                                           stdout=task_stdout,
                                                           stderr=task_stderr)
                self.tasks[task_id]['status']  = States.running
            else:
                self.tasks[task_id]['exec_fu'] = None
                app_fu = AppFuture(self.tasks[task_id]['exec_fu'],
                                   tid=task_id,
                                   stdout=task_stdout,
                                   stderr=task_stderr)
                app_fu.set_exception(DependencyError(exceptions,
                                                     "Failures in input dependencies",
                                                     None))
                self.tasks[task_id]['app_fu']  = app_fu
                self.tasks[task_id]['status']  = States.dep_fail
        else:
            # Send to pending, create the AppFuture with no parent and have it set
            # when an executor future is available.
            self.tasks[task_id]['app_fu']  = AppFuture(None, tid=task_id,
                                                       stdout=task_stdout,
                                                       stderr=task_stderr)
            self.tasks[task_id]['status']  = States.pending

        logger.debug("Task:%s Launched with AppFut:%s", task_id, task_def['app_fu'])
        return task_def['app_fu']