Exemple #1
0
    def submit(self,
               func,
               *args,
               executors='all',
               fn_hash=None,
               cache=False,
               **kwargs):
        """Add task to the dataflow system.

        If the app task has the executors attributes not set (default=='all')
        the task will be launched on a randomly selected executor from the
        list of executors. If the app task specifies a particular set of
        executors, it will be targeted at the specified executors.

        >>> IF all deps are met:
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - executors (list or string) : List of executors this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """

        if self.cleanup_called:
            raise ValueError("Cannot submit to a DFK that has been cleaned up")

        task_id = self.task_count
        self.task_count += 1
        if isinstance(executors, str) and executors.lower() == 'all':
            choices = list(e for e in self.executors if e != 'data_manager')
        elif isinstance(executors, list):
            choices = executors
        executor = random.choice(choices)

        # Transform remote input files to data futures
        args, kwargs = self._add_input_deps(executor, args, kwargs)

        task_def = {
            'depends': None,
            'executor': executor,
            'func': func,
            'func_name': func.__name__,
            'args': args,
            'kwargs': kwargs,
            'fn_hash': fn_hash,
            'memoize': cache,
            'callback': None,
            'exec_fu': None,
            'checkpoint': None,
            'fail_count': 0,
            'fail_history': [],
            'env': None,
            'status': States.unsched,
            'id': task_id,
            'time_submitted': None,
            'time_returned': None,
            'app_fu': None
        }

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "internal consistency error: Task {0} already exists in task list"
                .format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Get the dep count and a list of dependencies for the task
        dep_cnt, depends = self._gather_all_deps(args, kwargs)
        self.tasks[task_id]['depends'] = depends

        # Extract stdout and stderr to pass to AppFuture:
        task_stdout = kwargs.get('stdout')
        task_stderr = kwargs.get('stderr')

        logger.info("Task {} submitted for App {}, waiting on tasks {}".format(
            task_id, task_def['func_name'], [fu.tid for fu in depends]))

        self.tasks[task_id]['task_launch_lock'] = threading.Lock()
        app_fu = AppFuture(None,
                           tid=task_id,
                           stdout=task_stdout,
                           stderr=task_stderr)

        self.tasks[task_id]['app_fu'] = app_fu
        app_fu.add_done_callback(partial(self.handle_app_update, task_id))
        self.tasks[task_id]['status'] = States.pending
        logger.debug("Task {} set to pending state with AppFuture: {}".format(
            task_id, task_def['app_fu']))

        # at this point add callbacks to all dependencies to do a launch_if_ready
        # call whenever a dependency completes.

        # we need to be careful about the order of setting the state to pending,
        # adding the callbacks, and caling launch_if_ready explicitly once always below.

        # I think as long as we call launch_if_ready once after setting pending, then
        # we can add the callback dependencies at any point: if the callbacks all fire
        # before then, they won't cause a launch, but the one below will. if they fire
        # after we set it pending, then the last one will cause a launch, and the
        # explicit one won't.

        for d in depends:

            def callback_adapter(dep_fut):
                self.launch_if_ready(task_id)

            try:
                d.add_done_callback(callback_adapter)
            except Exception as e:
                logger.error(
                    "add_done_callback got an exception {} which will be ignored"
                    .format(e))

        self.launch_if_ready(task_id)

        return task_def['app_fu']
Exemple #2
0
    def submit(self, func, *args, executors='all', fn_hash=None, cache=False, **kwargs):
        """Add task to the dataflow system.

        If the app task has the executors attributes not set (default=='all')
        the task will be launched on a randomly selected executor from the
        list of executors. If the app task specifies a particular set of
        executors, it will be targeted at the specified executors.

        >>> IF all deps are met:
        >>>   send to the runnable queue and launch the task
        >>> ELSE:
        >>>   post the task in the pending queue

        Args:
            - func : A function object
            - *args : Args to the function

        KWargs :
            - executors (list or string) : List of executors this call could go to.
                    Default='all'
            - fn_hash (Str) : Hash of the function and inputs
                    Default=None
            - cache (Bool) : To enable memoization or not
            - kwargs (dict) : Rest of the kwargs to the fn passed as dict.

        Returns:
               (AppFuture) [DataFutures,]

        """

        if self.cleanup_called:
            raise ValueError("Cannot submit to a DFK that has been cleaned up")

        task_id = self.task_count
        self.task_count += 1
        if isinstance(executors, str) and executors.lower() == 'all':
            choices = list(e for e in self.executors if e != 'data_manager')
        elif isinstance(executors, list):
            choices = executors
        else:
            raise ValueError("Task {} supplied invalid type for executors: {}".format(task_id, type(executors)))
        executor = random.choice(choices)

        # The below uses func.__name__ before it has been wrapped by any staging code.

        label = kwargs.get('label')
        for kw in ['stdout', 'stderr']:
            if kw in kwargs:
                if kwargs[kw] == parsl.AUTO_LOGNAME:
                    kwargs[kw] = os.path.join(
                            self.run_dir,
                            'task_logs',
                            str(int(task_id / 10000)).zfill(4),  # limit logs to 10k entries per directory
                            'task_{}_{}{}.{}'.format(
                                str(task_id).zfill(4),
                                func.__name__,
                                '' if label is None else '_{}'.format(label),
                                kw)
                    )

        task_def = {'depends': None,
                    'executor': executor,
                    'func_name': func.__name__,
                    'fn_hash': fn_hash,
                    'memoize': cache,
                    'exec_fu': None,
                    'fail_count': 0,
                    'fail_history': [],
                    'status': States.unsched,
                    'id': task_id,
                    'time_submitted': None,
                    'time_returned': None}

        app_fu = AppFuture(task_def)

        # Transform remote input files to data futures
        args, kwargs, func = self._add_input_deps(executor, args, kwargs, func)

        func = self._add_output_deps(executor, args, kwargs, app_fu, func)

        task_def.update({
                    'args': args,
                    'func': func,
                    'kwargs': kwargs,
                    'app_fu': app_fu})

        if task_id in self.tasks:
            raise DuplicateTaskError(
                "internal consistency error: Task {0} already exists in task list".format(task_id))
        else:
            self.tasks[task_id] = task_def

        # Get the list of dependencies for the task
        depends = self._gather_all_deps(args, kwargs)
        self.tasks[task_id]['depends'] = depends

        depend_descs = []
        for d in depends:
            if isinstance(d, AppFuture) or isinstance(d, DataFuture):
                depend_descs.append("task {}".format(d.tid))
            else:
                depend_descs.append(repr(d))

        if depend_descs != []:
            waiting_message = "waiting on {}".format(", ".join(depend_descs))
        else:
            waiting_message = "not waiting on any dependency"

        logger.info("Task {} submitted for App {}, {}".format(task_id,
                                                              task_def['func_name'],
                                                              waiting_message))

        self.tasks[task_id]['task_launch_lock'] = threading.Lock()

        app_fu.add_done_callback(partial(self.handle_app_update, task_id))
        self.tasks[task_id]['status'] = States.pending
        logger.debug("Task {} set to pending state with AppFuture: {}".format(task_id, task_def['app_fu']))

        # at this point add callbacks to all dependencies to do a launch_if_ready
        # call whenever a dependency completes.

        # we need to be careful about the order of setting the state to pending,
        # adding the callbacks, and caling launch_if_ready explicitly once always below.

        # I think as long as we call launch_if_ready once after setting pending, then
        # we can add the callback dependencies at any point: if the callbacks all fire
        # before then, they won't cause a launch, but the one below will. if they fire
        # after we set it pending, then the last one will cause a launch, and the
        # explicit one won't.

        for d in depends:

            def callback_adapter(dep_fut):
                self.launch_if_ready(task_id)

            try:
                d.add_done_callback(callback_adapter)
            except Exception as e:
                logger.error("add_done_callback got an exception {} which will be ignored".format(e))

        self.launch_if_ready(task_id)

        return app_fu