Esempio n. 1
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        to_poll_tasks = []
        pollable_statuses = set([
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED])
        if poll_succ:
            pollable_statuses.add(TASK_STATUS_SUCCEEDED)
        for itask in itasks:
            if itask.state.status in pollable_statuses:
                to_poll_tasks.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if to_poll_tasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(
                self.JOBS_POLL, suite, to_poll_tasks,
                self._poll_task_jobs_callback)
Esempio n. 2
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error('[%s] -%s', itask, self.EVENT_SUBMIT_FAILED)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "time_submit_exit": event_time,
         "submit_status": 1,
     })
     job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
     self.job_pool.set_job_attr(job_d, 'batch_sys_job_id', None)
     itask.summary['submit_method_id'] = None
     self.pflag = True
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         # See github #476.
         if itask.state.reset(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(itask, self.EVENT_SUBMIT_FAILED,
                                       'job %s' % self.EVENT_SUBMIT_FAILED)
             self.job_pool.set_job_state(job_d, TASK_STATUS_SUBMIT_FAILED)
     elif itask.state.reset(TASK_STATUS_SUBMIT_RETRYING, ):
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         if itask.state.is_held:
             delay_msg = "held (%s)" % delay_msg
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("[%s] -job(%02d) %s", itask, itask.submit_num, msg)
         itask.set_summary_message(msg)
         self.setup_event_handlers(
             itask, self.EVENT_SUBMIT_RETRY,
             "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
         self.job_pool.set_job_state(job_d, TASK_STATUS_SUBMIT_RETRYING)
     self._reset_job_timers(itask)
Esempio n. 3
0
 def load_db_broadcast_states(self, row_idx, row):
     """Load broadcast variables from runtime DB broadcast states row."""
     if row_idx == 0:
         LOG.info("LOADING broadcast states")
     point, namespace, key, value = row
     sections = []
     cur_key = key
     if "]" in cur_key:
         sections = self.REC_SECTION.findall(cur_key)
         cur_key = cur_key.rsplit(r"]", 1)[-1]
     with self.lock:
         self.broadcasts.setdefault(point, {})
         self.broadcasts[point].setdefault(namespace, {})
         dict_ = self.broadcasts[point][namespace]
         for section in sections:
             dict_.setdefault(section, {})
             dict_ = dict_[section]
         dict_[cur_key] = value
     LOG.info(
         CHANGE_FMT.strip() % {
             "change": CHANGE_PREFIX_SET,
             "point": point,
             "namespace": namespace,
             "key": key,
             "value": value
         })
Esempio n. 4
0
 def _process_message_succeeded(self, itask, event_time):
     """Helper for process_message, handle a succeeded message."""
     job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
     self.job_pool.set_job_time(job_d, 'finished', event_time)
     self.pflag = True
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 0,
         "time_run_exit": event_time,
     })
     # Update mean elapsed time only on task succeeded.
     if itask.summary['started_time'] is not None:
         itask.tdef.elapsed_times.append(itask.summary['finished_time'] -
                                         itask.summary['started_time'])
     if not itask.state.outputs.all_completed():
         msg = ""
         for output in itask.state.outputs.get_not_completed():
             if output not in [
                     TASK_OUTPUT_EXPIRED, TASK_OUTPUT_SUBMIT_FAILED,
                     TASK_OUTPUT_FAILED
             ]:
                 msg += "\n  " + output
         if msg:
             LOG.info("[%s] -Succeeded with outputs not completed: %s",
                      itask, msg)
     if itask.state.reset(TASK_STATUS_SUCCEEDED):
         self.setup_event_handlers(itask, "succeeded", "job succeeded")
         self.job_pool.set_job_state(job_d, TASK_STATUS_SUCCEEDED)
     self._reset_job_timers(itask)
Esempio n. 5
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        to_poll_tasks = []
        pollable_statuses = {
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED
        }
        if poll_succ:
            pollable_statuses.add(TASK_STATUS_SUCCEEDED)
        for itask in itasks:
            if itask.state(*pollable_statuses):
                to_poll_tasks.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if to_poll_tasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(self.JOBS_POLL, suite, to_poll_tasks,
                              self._poll_task_jobs_callback)
Esempio n. 6
0
def remove_empty_parents(
    path: Union[Path, str], tail: Union[Path, str]
) -> None:
    """Work our way up the tail of path, removing empty dirs only.

    Args:
        path: Absolute path to the directory, e.g. /foo/bar/a/b/c
        tail: The tail of the path to work our way up, e.g. a/b/c

    Example:
        remove_empty_parents('/foo/bar/a/b/c', 'a/b/c') would remove
        /foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's
        empty).
    """
    path = Path(path)
    if not path.is_absolute():
        raise ValueError('path must be absolute')
    tail = Path(tail)
    if tail.is_absolute():
        raise ValueError('tail must not be an absolute path')
    if not str(path).endswith(str(tail)):
        raise ValueError(f"path '{path}' does not end with '{tail}'")
    depth = len(tail.parts) - 1
    for i in range(depth):
        parent = path.parents[i]
        if not parent.is_dir():
            continue
        try:
            parent.rmdir()
            LOG.info(f'Removing directory: {parent}')
        except OSError:
            break
Esempio n. 7
0
 def _process_message_failed(self, itask, event_time, message):
     """Helper for process_message, handle a failed message."""
     if event_time is None:
         event_time = get_current_time_string()
     itask.set_summary_time('finished', event_time)
     job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
     self.job_pool.set_job_time(job_d, 'finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 1,
         "time_run_exit": event_time,
     })
     if (TASK_STATUS_RETRYING not in itask.try_timers
             or itask.try_timers[TASK_STATUS_RETRYING].next() is None):
         # No retry lined up: definitive failure.
         self.pflag = True
         if itask.state.reset(TASK_STATUS_FAILED):
             self.setup_event_handlers(itask, "failed", message)
             self.job_pool.set_job_state(job_d, TASK_STATUS_FAILED)
         LOG.critical("[%s] -job(%02d) %s", itask, itask.submit_num,
                      "failed")
     elif itask.state.reset(TASK_STATUS_RETRYING):
         delay_msg = "retrying in %s" % (
             itask.try_timers[TASK_STATUS_RETRYING].delay_timeout_as_str())
         if itask.state.is_held:
             delay_msg = "held (%s)" % delay_msg
         msg = "failed, %s" % (delay_msg)
         LOG.info("[%s] -job(%02d) %s", itask, itask.submit_num, msg)
         itask.set_summary_message(msg)
         self.setup_event_handlers(itask, "retry",
                                   "%s, %s" % (self.JOB_FAILED, delay_msg))
     self._reset_job_timers(itask)
Esempio n. 8
0
def remote_clean(reg, platform_names, timeout):
    """Run subprocesses to clean workflows on remote install targets
    (skip localhost), given a set of platform names to look up.

    Args:
        reg (str): Workflow name.
        platform_names (list): List of platform names to look up in the global
            config, in order to determine the install targets to clean on.
        timeout (str): Number of seconds to wait before cancelling.
    """
    try:
        install_targets_map = (
            get_install_target_to_platforms_map(platform_names))
    except PlatformLookupError as exc:
        raise PlatformLookupError(
            "Cannot clean on remote platforms as the workflow database is "
            f"out of date/inconsistent with the global config - {exc}")

    pool = []
    for target, platforms in install_targets_map.items():
        if target == get_localhost_install_target():
            continue
        shuffle(platforms)
        LOG.info(
            f"Cleaning on install target: {platforms[0]['install target']}")
        # Issue ssh command:
        pool.append(
            (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms)
        )
    failed_targets = []
    # Handle subproc pool results almost concurrently:
    while pool:
        for proc, target, platforms in pool:
            ret_code = proc.poll()
            if ret_code is None:  # proc still running
                continue
            pool.remove((proc, target, platforms))
            out, err = (f.decode() for f in proc.communicate())
            if out:
                LOG.debug(out)
            if ret_code:
                # Try again using the next platform for this install target:
                this_platform = platforms.pop(0)
                excn = TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY, this_platform['name'],
                    " ".join(proc.args), ret_code, out, err)
                LOG.debug(excn)
                if platforms:
                    pool.append(
                        (_remote_clean_cmd(reg, platforms[0], timeout),
                         target, platforms)
                    )
                else:  # Exhausted list of platforms
                    failed_targets.append(target)
            elif err:
                LOG.debug(err)
        time.sleep(0.2)
    if failed_targets:
        raise CylcError(
            f"Could not clean on install targets: {', '.join(failed_targets)}")
Esempio n. 9
0
    def rsync_255_fail(ctx, platform=None) -> bool:
        """Tests context for rsync failing to communicate with a host.

        If there has been a failure caused by rsync being unable to connect
        try a test of ssh connectivity. Necessary becuase loss of connectivity
        may cause different rsync failures depending on version, and some of
        the failures may be caused by other problems.
        """
        rsync_255_fail = False
        platform_rsync_cmd = (platform['rsync command']
                              if platform is not None else 'rsync')
        rsync_cmd = shlex.split(platform_rsync_cmd)
        if (ctx.cmd[0] == rsync_cmd[0] and ctx.ret_code not in [0, 255]
                and is_remote_host(ctx.host)):
            ssh_cmd = (platform['ssh command']
                       if platform is not None else 'ssh')
            ssh_test_cmd = shlex.split(f'{ssh_cmd} {ctx.host} true')
            LOG.info(
                f'testing connectivity for {ctx.host} using {ssh_test_cmd}')
            ssh_test = run(shlex.split(f'{ssh_cmd} {ctx.host} true'),
                           capture_output=True)
            if ssh_test.returncode == 255:
                rsync_255_fail = True
        elif (ctx.cmd[0] == rsync_cmd[0] and ctx.ret_code == 255):
            rsync_255_fail = True
        return rsync_255_fail
Esempio n. 10
0
    def clear_broadcast(self,
                        point_strings=None,
                        namespaces=None,
                        cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list
                                  or keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(self._prune(), point_strings,
                                            namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(get_broadcast_change_report(modified_settings,
                                             is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))
        if modified_settings:
            self.data_store_mgr.delta_broadcast()
        return modified_settings, bad_options
Esempio n. 11
0
 def _run_event_handlers_callback(proc_ctx):
     """Callback on completion of a workflow event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
     else:
         LOG.info(str(proc_ctx))
Esempio n. 12
0
 def insert_db_job(self, row_idx, row):
     """Load job element from DB post restart."""
     if row_idx == 0:
         LOG.info("LOADING job data")
     (point_string, name, status, submit_num, time_submit, time_run,
      time_run_exit, batch_sys_name, batch_sys_job_id, platform_name) = row
     if status not in JOB_STATUS_SET:
         return
     t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}'
     j_id = f'{t_id}{ID_DELIM}{submit_num}'
     try:
         tdef = self.schd.config.get_taskdef(name)
         j_owner = self.schd.owner
         if platform_name:
             j_host = get_host_from_platform(get_platform(platform_name))
         else:
             j_host = self.schd.host
         j_buf = PbJob(
             stamp=f'{j_id}@{time()}',
             id=j_id,
             submit_num=submit_num,
             state=status,
             task_proxy=t_id,
             submitted_time=time_submit,
             started_time=time_run,
             finished_time=time_run_exit,
             batch_sys_name=batch_sys_name,
             batch_sys_job_id=batch_sys_job_id,
             host=j_host,
             owner=j_owner,
             name=name,
             cycle_point=point_string,
         )
         # Add in log files.
         j_buf.job_log_dir = get_task_job_log(self.schd.suite, point_string,
                                              name, submit_num)
         overrides = self.schd.task_events_mgr.broadcast_mgr.get_broadcast(
             TaskID.get(name, point_string))
         if overrides:
             rtconfig = pdeepcopy(tdef.rtconfig)
             poverride(rtconfig, overrides, prepend=True)
         else:
             rtconfig = tdef.rtconfig
         j_buf.extra_logs.extend([
             os.path.expanduser(os.path.expandvars(log_file))
             for log_file in rtconfig['extra log files']
         ])
     except SuiteConfigError:
         LOG.exception(
             ('ignoring job %s from the suite run database\n'
              '(its task definition has probably been deleted).') % j_id)
     except Exception:
         LOG.exception('could not load job %s' % j_id)
     else:
         self.added[j_id] = j_buf
         self.task_jobs.setdefault(t_id, set()).add(j_id)
         self.updates_pending = True
Esempio n. 13
0
    def clear_broadcast(
            self, point_strings=None, namespaces=None, cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list or
                                    keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(
            self._prune(), point_strings, namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(
            get_broadcast_change_report(modified_settings, is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return modified_settings, bad_options
Esempio n. 14
0
 def _run_event_handlers_callback(proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Esempio n. 15
0
 def _run_event_handlers_callback(proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Esempio n. 16
0
        def _authorise(self, *args, user='******', meta=None, **kwargs):
            if not meta:
                meta = {}
            host = meta.get('host', '?')
            prog = meta.get('prog', '?')

            # Hardcoded, for new - but much of this functionality can be
            # removed more swingingly.
            LOG.info('[client-command] %s %s@%s:%s', fcn.__name__, user, host,
                     prog)
            return fcn(self, *args, **kwargs)
Esempio n. 17
0
 def state_reset(self,
                 status=None,
                 is_held=None,
                 is_queued=None,
                 is_runahead=None) -> bool:
     """Set new state and log the change. Return whether it changed."""
     before = str(self)
     if self.state.reset(status, is_held, is_queued, is_runahead):
         LOG.info(f"[{before}] => {self.state}")
         return True
     return False
Esempio n. 18
0
async def auto_restart(scheduler, _):
    """Automatically restart the workflow if configured to do so."""
    current_glbl_cfg = glbl_cfg(cached=False)
    mode = _should_auto_restart(scheduler, current_glbl_cfg)

    if mode:
        LOG.info('The Cylc workflow host will soon become un-available.')
        _set_auto_restart(scheduler,
                          restart_delay=current_glbl_cfg.get(
                              ['scheduler', 'auto restart delay']),
                          mode=mode)
Esempio n. 19
0
def scheduler_cli(parser, options, args, is_restart=False):
    """Implement cylc (run|restart).

    This function should contain all of the command line facing
    functionality of the Scheduler, exit codes, logging, etc.

    The Scheduler itself should be a Python object you can import and
    run in a regular Python session so cannot contain this kind of
    functionality.

    """
    reg = args[0]
    # Check suite is not already running before start of host selection.
    try:
        suite_files.detect_old_contact_file(reg)
    except SuiteServiceFileError as exc:
        sys.exit(exc)

    _check_registration(reg)

    # re-execute on another host if required
    _distribute(options.host, is_restart)

    # print the start message
    if options.no_detach or options.format == 'plain':
        _start_print_blurb()

    # setup the scheduler
    # NOTE: asyncio.run opens an event loop, runs your coro,
    #       then shutdown async generators and closes the event loop
    scheduler = Scheduler(reg, options, is_restart=is_restart)
    asyncio.run(_setup(parser, options, reg, is_restart, scheduler))

    # daemonize if requested
    # NOTE: asyncio event loops cannot persist across daemonization
    #       ensure you have tidied up all threads etc before daemonizing
    if not options.no_detach:
        from cylc.flow.daemonize import daemonize
        daemonize(scheduler)

    # setup loggers
    _open_logs(reg, options.no_detach)

    # run the workflow
    ret = asyncio.run(_run(parser, options, reg, is_restart, scheduler))

    # exit
    # NOTE: we must clean up all asyncio / threading stuff before exiting
    # NOTE: any threads which include sleep statements could cause
    #       sys.exit to hang if not shutdown properly
    LOG.info("DONE")
    _close_logs()
    sys.exit(ret)
Esempio n. 20
0
    def test_value_error_raises_system_exit(
        self,
        mocked_glbl_cfg,
        mocked_get_suite_run_log_name,
    ):
        """Test that a ValueError when writing to a log stream won't result
        in multiple exceptions (what could lead to infinite loop in some
        occasions. Instead, it **must** raise a SystemExit"""
        with tempfile.NamedTemporaryFile() as tf:
            # mock objects used when creating the file handler
            mocked = mock.MagicMock()
            mocked_glbl_cfg.return_value = mocked
            mocked.get.return_value = 100
            mocked_get_suite_run_log_name.return_value = tf.name
            file_handler = TimestampRotatingFileHandler("suiteA", False)
            # next line is important as pytest can have a "Bad file descriptor"
            # due to a FileHandler with default "a" (pytest tries to r/w).
            file_handler.mode = "a+"

            # enable the logger
            LOG.setLevel(logging.INFO)
            LOG.addHandler(file_handler)

            # Disable raising uncaught exceptions in logging, due to file
            # handler using stdin.fileno. See the following links for more.
            # https://github.com/pytest-dev/pytest/issues/2276 &
            # https://github.com/pytest-dev/pytest/issues/1585
            logging.raiseExceptions = False

            # first message will initialize the stream and the handler
            LOG.info("What could go")

            # here we change the stream of the handler
            old_stream = file_handler.stream
            file_handler.stream = mock.MagicMock()
            file_handler.stream.seek = mock.MagicMock()
            # in case where
            file_handler.stream.seek.side_effect = ValueError

            try:
                # next call will call the emit method and use the mocked stream
                LOG.info("wrong?!")
                self.fail("Exception SystemError was not raised")
            except SystemExit:
                pass
            finally:
                # clean up
                file_handler.stream = old_stream
                # for log_handler in LOG.handlers:
                #     log_handler.close()
                file_handler.close()
                LOG.removeHandler(file_handler)
                logging.raiseExceptions = True
Esempio n. 21
0
    def poll_task_jobs(self, suite, itasks, msg=None):
        """Poll jobs of specified tasks.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        if itasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(self.JOBS_POLL, suite, itasks,
                              self._poll_task_jobs_callback)
Esempio n. 22
0
    def satisfy_xclock(self, itask: TaskProxy):
        """Attempt to satisfy itask's clock trigger, if it has one.

        Args:
            itask (TaskProxy): TaskProxy
        """
        label, sig, ctx, satisfied = self._get_xclock(itask)
        if satisfied:
            return
        if wall_clock(*ctx.func_args, **ctx.func_kwargs):
            itask.state.xclock = (label, True)
            self.sat_xclock.append(sig)
            LOG.info('clock xtrigger satisfied: %s = %s' % (label, str(ctx)))
Esempio n. 23
0
    def load_xtrigger_for_restart(self, row_idx: int, row: Tuple[str, str]):
        """Load satisfied xtrigger results from suite DB.

        Args:
            row_idx (int): row index (used for logging)
            row (Tuple[str, str]): tuple with the signature and results (json)
        Raises:
            ValueError: if the row cannot be parsed as JSON
        """
        if row_idx == 0:
            LOG.info("LOADING satisfied xtriggers")
        sig, results = row
        self.sat_xtrig[sig] = json.loads(results)
Esempio n. 24
0
    def put_broadcast(self,
                      point_strings=None,
                      namespaces=None,
                      settings=None):
        """Add new broadcast settings (server side interface).

        Return a tuple (modified_settings, bad_options) where:
          modified_settings is list of modified settings in the form:
            [("20200202", "foo", {"script": "true"}, ...]
          bad_options is as described in the docstring for self.clear().
        """
        modified_settings = []
        bad_point_strings = []
        bad_namespaces = []

        with self.lock:
            for setting in settings:
                for point_string in point_strings:
                    # Standardise the point and check its validity.
                    bad_point = False
                    try:
                        point_string = standardise_point_string(point_string)
                    except PointParsingError:
                        if point_string != '*':
                            bad_point_strings.append(point_string)
                            bad_point = True
                    if not bad_point and point_string not in self.broadcasts:
                        self.broadcasts[point_string] = {}
                    for namespace in namespaces:
                        if namespace not in self.linearized_ancestors:
                            bad_namespaces.append(namespace)
                        elif not bad_point:
                            if namespace not in self.broadcasts[point_string]:
                                self.broadcasts[point_string][namespace] = {}
                            addict(self.broadcasts[point_string][namespace],
                                   setting)
                            modified_settings.append(
                                (point_string, namespace, setting))

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings)
        LOG.info(get_broadcast_change_report(modified_settings))

        bad_options = {}
        if bad_point_strings:
            bad_options["point_strings"] = bad_point_strings
        if bad_namespaces:
            bad_options["namespaces"] = bad_namespaces
        if modified_settings:
            self.data_store_mgr.delta_broadcast()
        return modified_settings, bad_options
Esempio n. 25
0
    def call_xtriggers_async(self, itask: TaskProxy):
        """Call itask's xtrigger functions via the process pool...

        ...if previous call not still in-process and retry period is up.


        Args:
            itask: task proxy to check.
        """
        for label, sig, ctx, _ in self._get_xtrigs(itask, unsat_only=True):
            if sig.startswith("wall_clock"):
                # Special case: quick synchronous clock check.
                if 'absolute_as_seconds' not in ctx.func_kwargs:
                    ctx.func_kwargs.update(
                        {
                            'point_as_seconds': itask.get_point_as_seconds()
                        }
                    )
                if wall_clock(*ctx.func_args, **ctx.func_kwargs):
                    itask.state.xtriggers[label] = True
                    self.sat_xtrig[sig] = {}
                    self.data_store_mgr.delta_task_xtrigger(sig, True)
                    LOG.info('xtrigger satisfied: %s = %s', label, sig)
                continue
            # General case: potentially slow asynchronous function call.
            if sig in self.sat_xtrig:
                if not itask.state.xtriggers[label]:
                    itask.state.xtriggers[label] = True
                    res = {}
                    for key, val in self.sat_xtrig[sig].items():
                        res["%s_%s" % (label, key)] = val
                    if res:
                        xtrigger_env = [{'environment': {key: val}} for
                                        key, val in res.items()]
                        self.broadcast_mgr.put_broadcast(
                            [str(itask.point)],
                            [itask.tdef.name],
                            xtrigger_env
                        )
                continue
            if sig in self.active:
                # Already waiting on this result.
                continue
            now = time()
            if sig in self.t_next_call and now < self.t_next_call[sig]:
                # Too soon to call this one again.
                continue
            self.t_next_call[sig] = now + ctx.intvl
            # Queue to the process pool, and record as active.
            self.active.append(sig)
            self.proc_pool.put_command(ctx, self.callback)
Esempio n. 26
0
def remove_dir_and_target(path: Union[Path, str]) -> None:
    """Delete a directory tree (i.e. including contents), as well as the
    target directory tree if the specified path is a symlink.

    Args:
        path: the absolute path of the directory to delete.
    """
    if not os.path.isabs(path):
        raise ValueError('Path must be absolute')
    if os.path.exists(path) and not os.path.isdir(path):
        raise NotADirectoryError(path)
    if os.path.islink(path):
        if os.path.exists(path):
            target = os.path.realpath(path)
            LOG.info(
                f'Removing symlink target directory: ({path} ->) {target}'
            )
            rmtree(target, onerror=handle_rmtree_err)
            LOG.info(f'Removing symlink: {path}')
        else:
            LOG.info(f'Removing broken symlink: {path}')
        os.remove(path)
    elif not os.path.exists(path):
        raise FileNotFoundError(path)
    else:
        LOG.info(f'Removing directory: {path}')
        rmtree(path, onerror=handle_rmtree_err)
Esempio n. 27
0
def main(parser, options, suite, event_msg, event_id):
    suite = os.path.normpath(suite)
    LOG.info('Send to suite %s: "%s" (%s)', suite, event_msg, event_id)
    pclient = get_client(suite, timeout=options.comms_timeout)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    mutation_kwargs = {
        'request_string': MUTATION,
        'variables': {
            'wFlows': [suite],
            'eventMsg': event_msg,
            'eventId': event_id,
        }
    }

    for i_try in range(max_n_tries):
        try:
            pclient('graphql', mutation_kwargs)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            LOG.info(MSG_SEND_RETRY, retry_intvl_secs, options.comms_timeout)
            sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Esempio n. 28
0
def main(parser, options, suite, event_msg, event_id):
    LOG.info('Send to suite %s: "%s" (%s)', suite, event_msg, event_id)

    pclient = SuiteRuntimeClient(suite, options.owner, options.host,
                                 options.port)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    for i_try in range(max_n_tries):
        try:
            pclient('put_ext_trigger', {
                'message': event_msg,
                'id': event_id
            },
                    timeout=options.comms_timeout)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            else:
                LOG.info(MSG_SEND_RETRY, retry_intvl_secs,
                         options.comms_timeout)
                sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Esempio n. 29
0
def main(parser: COP, options: 'Values', workflow: str, event_msg: str,
         event_id: str) -> None:
    workflow, _ = parse_reg(workflow)
    LOG.info('Send to workflow %s: "%s" (%s)', workflow, event_msg, event_id)
    pclient = get_client(workflow, timeout=options.comms_timeout)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    mutation_kwargs = {
        'request_string': MUTATION,
        'variables': {
            'wFlows': [workflow],
            'eventMsg': event_msg,
            'eventId': event_id,
        }
    }

    for i_try in range(max_n_tries):
        try:
            pclient('graphql', mutation_kwargs)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            LOG.info(MSG_SEND_RETRY, retry_intvl_secs, options.comms_timeout)
            sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Esempio n. 30
0
    def test_value_error_raises_system_exit(self, mocked_glbl_cfg):
        """Test that a ValueError when writing to a log stream won't result
        in multiple exceptions (what could lead to infinite loop in some
        occasions. Instead, it **must** raise a SystemExit"""
        with tempfile.NamedTemporaryFile() as tf:
            # mock objects used when creating the file handler
            mocked = mock.MagicMock()
            mocked_glbl_cfg.return_value = mocked
            mocked.get_derived_host_item.return_value = tf.name
            mocked.get.return_value = 100
            file_handler = TimestampRotatingFileHandler("suiteA", False)
            # next line is important as pytest can have a "Bad file descriptor"
            # due to a FileHandler with default "a" (pytest tries to r/w).
            file_handler.mode = "a+"

            # enable the logger
            LOG.setLevel(logging.INFO)
            LOG.addHandler(file_handler)

            # Disable raising uncaught exceptions in logging, due to file
            # handler using stdin.fileno. See the following links for more.
            # https://github.com/pytest-dev/pytest/issues/2276 &
            # https://github.com/pytest-dev/pytest/issues/1585
            logging.raiseExceptions = False

            # first message will initialize the stream and the handler
            LOG.info("What could go")

            # here we change the stream of the handler
            old_stream = file_handler.stream
            file_handler.stream = mock.MagicMock()
            file_handler.stream.seek = mock.MagicMock()
            # in case where
            file_handler.stream.seek.side_effect = ValueError

            try:
                # next call will call the emit method and use the mocked stream
                LOG.info("wrong?!")
                self.fail("Exception SystemError was not raised")
            except SystemExit:
                pass
            finally:
                # clean up
                file_handler.stream = old_stream
                # for log_handler in LOG.handlers:
                #     log_handler.close()
                file_handler.close()
                LOG.removeHandler(file_handler)
                logging.raiseExceptions = True
Esempio n. 31
0
File: server.py Progetto: cylc/cylc
        def _authorise(self, *args, user='******', meta=None, **kwargs):
            if not meta:
                meta = {}
            host = meta.get('host', '?')
            prog = meta.get('prog', '?')

            usr_priv_level = self._get_priv_level(user)
            if usr_priv_level < req_priv_level:
                LOG.warn(
                    "[client-connect] DENIED (privilege '%s' < '%s') %s@%s:%s",
                    usr_priv_level, req_priv_level, user, host, prog)
                raise Exception('Authorisation failure')
            LOG.info(
                '[client-command] %s %s@%s:%s', fcn.__name__, user, host, prog)
            return fcn(self, *args, **kwargs)
Esempio n. 32
0
def run_reftest(config, ctx):
    """Run reference test at shutdown."""
    reffilename = config.get_ref_log_name()
    curfilename = get_workflow_test_log_name(ctx.workflow)
    ref = _load_reflog(reffilename)
    cur = _load_reflog(curfilename)
    if ref == cur:
        LOG.info('WORKFLOW REFERENCE TEST PASSED')
    else:
        exc = WorkflowEventError(
            'WORKFLOW REFERENCE TEST FAILED\n'
            'triggering is NOT consistent with the reference log:\n%s\n' %
            '\n'.join(unified_diff(ref, cur, 'reference', 'this run')))
        LOG.exception(exc)
        raise exc
Esempio n. 33
0
        def _authorise(self, *args, user='******', meta=None, **kwargs):
            if not meta:
                meta = {}
            host = meta.get('host', '?')
            prog = meta.get('prog', '?')

            usr_priv_level = self._get_priv_level(user)
            if usr_priv_level < req_priv_level:
                LOG.warn(
                    "[client-connect] DENIED (privilege '%s' < '%s') %s@%s:%s",
                    usr_priv_level, req_priv_level, user, host, prog)
                raise Exception('Authorisation failure')
            LOG.info('[client-command] %s %s@%s:%s', fcn.__name__, user, host,
                     prog)
            return fcn(self, *args, **kwargs)
Esempio n. 34
0
    def check_task_jobs(self, suite, task_pool):
        """Check submission and execution timeout and polling timers.

        Poll tasks that have timed out and/or have reached next polling time.
        """
        now = time()
        poll_tasks = set()
        for itask in task_pool.get_tasks():
            if self.task_events_mgr.check_job_time(itask, now):
                poll_tasks.add(itask)
                if itask.poll_timer.delay is not None:
                    LOG.info('[%s] -poll now, (next in %s)', itask,
                             itask.poll_timer.delay_timeout_as_str())
        if poll_tasks:
            self.poll_task_jobs(suite, poll_tasks)
Esempio n. 35
0
    def put_broadcast(
            self, point_strings=None, namespaces=None, settings=None):
        """Add new broadcast settings (server side interface).

        Return a tuple (modified_settings, bad_options) where:
          modified_settings is list of modified settings in the form:
            [("20200202", "foo", {"script": "true"}, ...]
          bad_options is as described in the docstring for self.clear().
        """
        modified_settings = []
        bad_point_strings = []
        bad_namespaces = []

        with self.lock:
            for setting in settings:
                for point_string in point_strings:
                    # Standardise the point and check its validity.
                    bad_point = False
                    try:
                        point_string = standardise_point_string(point_string)
                    except PointParsingError:
                        if point_string != '*':
                            bad_point_strings.append(point_string)
                            bad_point = True
                    if not bad_point and point_string not in self.broadcasts:
                        self.broadcasts[point_string] = {}
                    for namespace in namespaces:
                        if namespace not in self.linearized_ancestors:
                            bad_namespaces.append(namespace)
                        elif not bad_point:
                            if namespace not in self.broadcasts[point_string]:
                                self.broadcasts[point_string][namespace] = {}
                            self._addict(
                                self.broadcasts[point_string][namespace],
                                setting)
                            modified_settings.append(
                                (point_string, namespace, setting))

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings)
        LOG.info(get_broadcast_change_report(modified_settings))

        bad_options = {}
        if bad_point_strings:
            bad_options["point_strings"] = bad_point_strings
        if bad_namespaces:
            bad_options["namespaces"] = bad_namespaces
        return modified_settings, bad_options
Esempio n. 36
0
    def upgrade_retry_state(self):
        """Replace the retry state with xtriggers.

        * Change *retrying tasks to waiting
        * Add the required xtrigger

        Note:
            The retry status can be safely removed as this is really a display
            state, the retry logic revolves around the TaskActionTimer.

        From:
            cylc<8
        To:
            cylc>=8
        PR:
            #3423

        Returns:
            list - (cycle, name, status) tuples of all retrying tasks.

        """
        conn = self.connect()

        for table in [self.TABLE_TASK_POOL_CHECKPOINTS, self.TABLE_TASK_POOL]:
            tasks = list(
                conn.execute(rf'''
                    SELECT
                        cycle, name, status
                    FROM
                        {table}
                    WHERE
                        status IN ('retrying', 'submit-retrying')
                '''))
            if tasks:
                LOG.info(f'Upgrade retrying tasks in table {table}')
            conn.executemany(
                rf'''
                    UPDATE
                        {table}
                    SET
                        status='{TASK_STATUS_WAITING}'
                    WHERE
                        cycle==?
                        and name==?
                        and status==?
                ''', tasks)
        conn.commit()
        return tasks
Esempio n. 37
0
    def check_task_jobs(self, suite, task_pool):
        """Check submission and execution timeout and polling timers.

        Poll tasks that have timed out and/or have reached next polling time.
        """
        now = time()
        poll_tasks = set()
        for itask in task_pool.get_tasks():
            if self.task_events_mgr.check_job_time(itask, now):
                poll_tasks.add(itask)
                if itask.poll_timer.delay is not None:
                    LOG.info(
                        '[%s] -poll now, (next in %s)',
                        itask, itask.poll_timer.delay_timeout_as_str())
        if poll_tasks:
            self.poll_task_jobs(suite, poll_tasks)
Esempio n. 38
0
    def verify_triggering(self):
        new = self.get_triggered(self.new_loglines)
        ref = self.get_triggered(self.ref_loglines)

        if len(new) == 0:
            raise LogAnalyserError(
                "new log contains no triggering info.")

        if len(ref) == 0:
            raise LogAnalyserError(
                "reference log contains no triggering info.")

        new.sort()
        ref.sort()

        if new != ref:
            diff = unified_diff(new, ref, 'this run', 'reference log')
            raise LogAnalyserError(
                "triggering is NOT consistent with the reference log:" +
                '\n' + '\n'.join(diff) + '\n')
        else:
            LOG.info(
                "LogAnalyser: triggering is consistent with the reference log")
Esempio n. 39
0
 def load_db_broadcast_states(self, row_idx, row):
     """Load broadcast variables from runtime DB broadcast states row."""
     if row_idx == 0:
         LOG.info("LOADING broadcast states")
     point, namespace, key, value = row
     sections = []
     cur_key = key
     if "]" in cur_key:
         sections = self.REC_SECTION.findall(cur_key)
         cur_key = cur_key.rsplit(r"]", 1)[-1]
     with self.lock:
         self.broadcasts.setdefault(point, {})
         self.broadcasts[point].setdefault(namespace, {})
         dict_ = self.broadcasts[point][namespace]
         for section in sections:
             dict_.setdefault(section, {})
             dict_ = dict_[section]
         dict_[cur_key] = value
     LOG.info(CHANGE_FMT.strip() % {
         "change": CHANGE_PREFIX_SET,
         "point": point,
         "namespace": namespace,
         "key": key,
         "value": value})
Esempio n. 40
0
 def _run_event_mail_callback(proc_ctx):
     """Callback the mail command for notification of a suite event."""
     if proc_ctx.ret_code:
         LOG.warning(str(proc_ctx))
     else:
         LOG.info(str(proc_ctx))
Esempio n. 41
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (
                self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name']) and
                not is_remote_host(host)
            ):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info(
                    '[%s] -submit-num=%02d, owner@host=%s',
                    itask, itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(itask, {
                    'is_manual_submit': itask.is_manual_submit,
                    'try_num': itask.get_try_num(),
                    'time_submit': now_str,
                    'user_at_host': owner_at_host,
                    'batch_sys_name': itask.summary['batch_sys_name'],
                })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(
                            self.JOBS_SUBMIT,
                            '(init %s)' % owner_at_host,
                            err=REMOTE_INIT_FAILED,
                            ret_code=1),
                        suite, itask.point, itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size] for i in range(0,
                                                        len(itasks),
                                                        chunk_size)]
            LOG.debug(
                '%s ... # will invoke in batches, sizes=%s',
                cmd, [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(
                                suite, itask.point, itask.tdef.name,
                                itask.submit_num))
                    job_log_dirs.append(get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset_state(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                        **kwargs
                    ),
                    self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks