Esempio n. 1
0
 def list_suites(self, regfilter=None):
     """Return a filtered list of valid suite registrations."""
     rec_regfilter = None
     if regfilter:
         try:
             rec_regfilter = re.compile(regfilter)
         except re.error as exc:
             raise ValueError("%s: %s" % (regfilter, exc))
     run_d = glbl_cfg().get_host_item('run directory')
     results = []
     for dirpath, dnames, _ in os.walk(run_d, followlinks=True):
         # Always descend for top directory, but
         # don't descend further if it has a .service/ dir
         if dirpath != run_d and self.DIR_BASE_SRV in dnames:
             dnames[:] = []
         # Choose only suites with .service and matching filter
         reg = os.path.relpath(dirpath, run_d)
         path = os.path.join(dirpath, self.DIR_BASE_SRV)
         if (not self._locate_item(self.FILE_BASE_SOURCE, path) or
                 rec_regfilter and not rec_regfilter.search(reg)):
             continue
         try:
             results.append([
                 reg,
                 self.get_suite_source_dir(reg),
                 self.get_suite_title(reg)])
         except (IOError, SuiteServiceFileError) as exc:
             LOG.error('%s: %s', reg, exc)
     return results
Esempio n. 2
0
 def _manip_task_jobs_callback(self,
                               ctx,
                               suite,
                               itasks,
                               summary_callback,
                               more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning('Unhandled %s output: %s', ctx.cmd_key,
                                 line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = ("|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Esempio n. 3
0
    def clear_broadcast(
            self, point_strings=None, namespaces=None, cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list or
                                    keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(
            self._prune(), point_strings, namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(
            get_broadcast_change_report(modified_settings, is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return modified_settings, bad_options
Esempio n. 4
0
    def clear_broadcast(
            self, point_strings=None, namespaces=None, cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list or
                                    keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(
            self._prune(), point_strings, namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(
            get_broadcast_change_report(modified_settings, is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return modified_settings, bad_options
Esempio n. 5
0
 def _run_event_handlers_callback(proc_ctx, abort_on_error=False):
     """Callback on completion of a workflow event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
         if abort_on_error:
             raise WorkflowEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Esempio n. 6
0
 def _run_event_handlers_callback(proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Esempio n. 7
0
    def publish(self, items):
        """Publish topics.

        Args:
            items (iterable): [(topic, data, serializer)]

        """
        try:
            self.loop.run_until_complete(gather_coros(self.send_multi, items))
        except Exception as exc:
            LOG.error('publish: %s', exc)
Esempio n. 8
0
 def _subshell_eval_callback(self, proc_ctx, cmd_str):
     """Callback when subshell eval command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         self.remote_command_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_command_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Esempio n. 9
0
 def _manip_task_jobs_callback(
         self, ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning(
                         'Unhandled %s output: %s', ctx.cmd_key, line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = (
             "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Esempio n. 10
0
    async def publish(self, items):
        """Publish topics.

        Args:
            items (iterable): [(topic, data, serializer)]

        """
        try:
            await gather_coros(self.send_multi, items)
        except Exception as exc:
            LOG.error('publish: %s', exc)
Esempio n. 11
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (ctx.event in self.get_events_conf(
             config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'message': quote(ctx.reason),
                 'suite': quote(ctx.suite),
                 'suite_uuid': quote(str(ctx.uuid_str)),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (handler, ctx.event, ctx.suite,
                                          ctx.reason)
         proc_ctx = SubProcContext(cmd_key,
                                   cmd,
                                   env=dict(os.environ),
                                   shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(proc_ctx,
                                        self._run_event_handlers_callback)
Esempio n. 12
0
 def set_job_state(self, job_d, status):
     """Set job state."""
     update_time = time()
     point, name, sub_num = self.parse_job_item(job_d)
     j_id = (f'{self.workflow_id}{ID_DELIM}{point}'
             f'{ID_DELIM}{name}{ID_DELIM}{sub_num}')
     if status in JOB_STATUSES_ALL:
         j_delta = PbJob(stamp=f'{j_id}@{update_time}', state=status)
         self.updates.setdefault(j_id, PbJob(id=j_id)).MergeFrom(j_delta)
         self.updates_pending = True
     else:
         LOG.error(f'Unable to set {j_id} state field to {status}')
Esempio n. 13
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (
             ctx.event in
             self.get_events_conf(config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'message': quote(ctx.reason),
                 'suite': quote(ctx.suite),
                 'suite_uuid': quote(str(ctx.uuid_str)),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (
                 handler, ctx.event, ctx.suite, ctx.reason)
         proc_ctx = SubProcContext(
             cmd_key, cmd, env=dict(os.environ), shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_handlers_callback)
Esempio n. 14
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Esempio n. 15
0
 def _subshell_eval_callback(self, proc_ctx, cmd_str):
     """Callback when subshell eval command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         self.remote_command_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_command_map[cmd_str] = PlatformError(
             PlatformError.MSG_SELECT,
             None,
             ctx=proc_ctx,
         )
Esempio n. 16
0
 def add_job_msg(self, job_d, msg):
     """Add message to job."""
     update_time = time()
     point, name, sub_num = self.parse_job_item(job_d)
     j_id = (f'{self.workflow_id}{ID_DELIM}{point}'
             f'{ID_DELIM}{name}{ID_DELIM}{sub_num}')
     try:
         j_delta = PbJob(stamp=f'{j_id}@{update_time}')
         j_delta.messages.append(msg)
         self.updates.setdefault(j_id, PbJob(id=j_id)).MergeFrom(j_delta)
         self.updates_pending = True
     except TypeError as exc:
         LOG.error(f'Unable to append to {j_id} message field: {str(exc)}')
Esempio n. 17
0
 def set_job_attr(self, job_d, attr_key, attr_val):
     """Set job attribute."""
     update_time = time()
     point, name, sub_num = self.parse_job_item(job_d)
     j_id = (f'{self.workflow_id}{ID_DELIM}{point}'
             f'{ID_DELIM}{name}{ID_DELIM}{sub_num}')
     try:
         j_delta = PbJob(stamp=f'{j_id}@{update_time}')
         setattr(j_delta, attr_key, attr_val)
         self.updates.setdefault(j_id, PbJob(id=j_id)).MergeFrom(j_delta)
         self.updates_pending = True
     except (TypeError, AttributeError) as exc:
         LOG.error(f'Unable to set {j_id} data field: {str(exc)}')
Esempio n. 18
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Esempio n. 19
0
    def _remote_init_callback(self, proc_ctx, platform, tmphandle, curve_auth,
                              client_pub_key_dir):
        """Callback when "cylc remote-init" exits.

        Write public key for install target into client public key
        directory.
        Set remote_init__map status to REMOTE_INIT_DONE on success which
        in turn will trigger file installation to start.
        Set remote_init_map status to REMOTE_INIT_FAILED on error.

        """
        with suppress(OSError):  # E.g. ignore bad unlink, etc
            tmphandle.close()
        install_target = platform['install target']
        if proc_ctx.ret_code == 0 and "KEYSTART" in proc_ctx.out:
            regex_result = re.search('KEYSTART((.|\n|\r)*)KEYEND',
                                     proc_ctx.out)
            key = regex_result.group(1)
            workflow_srv_dir = get_workflow_srv_dir(self.workflow)
            public_key = KeyInfo(KeyType.PUBLIC,
                                 KeyOwner.CLIENT,
                                 workflow_srv_dir=workflow_srv_dir,
                                 install_target=install_target)
            old_umask = os.umask(0o177)
            with open(public_key.full_key_path, 'w',
                      encoding='utf8') as text_file:
                text_file.write(key)
            os.umask(old_umask)
            # configure_curve must be called every time certificates are
            # added or removed, in order to update the Authenticator's
            # state.
            curve_auth.configure_curve(domain='*',
                                       location=(client_pub_key_dir))
            self.remote_init_map[install_target] = REMOTE_INIT_DONE
            self.ready = True
            return
        # Bad status
        LOG.error(
            PlatformError(
                PlatformError.MSG_INIT,
                platform['name'],
                cmd=proc_ctx.cmd,
                ret_code=proc_ctx.ret_code,
                out=proc_ctx.out,
                err=proc_ctx.err,
            ))

        self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
        self.ready = True
Esempio n. 20
0
 def _run_event_custom_handlers(self, schd, template_variables, event):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     config = schd.config
     handlers = self.get_events_conf(config, '%s handlers' % event)
     if not handlers and (
         event in
         self.get_events_conf(config, 'handler events', [])
     ):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.WORKFLOW_EVENT_HANDLER, i), event)
         try:
             cmd = handler % (template_variables)
         except KeyError as exc:
             message = f'{cmd_key} bad template: {handler}\n{exc}'
             LOG.error(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = (
                 f"%(handler)s"
                 f" '%({EventData.Event.value})s'"
                 f" '%({EventData.Workflow.value})s'"
                 f" '%({EventData.Message.value})s'"
             ) % (
                 {'handler': handler, **template_variables}
             )
         proc_ctx = SubProcContext(
             cmd_key,
             cmd,
             env=dict(os.environ),
             shell=True  # nosec (designed to run user defined code)
         )
         if self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, callback=self._run_event_handlers_callback)
Esempio n. 21
0
    def set_job_time(self, job_d, event_key, time_str=None):
        """Set an event time in job pool object.

        Set values of both event_key + '_time' and event_key + '_time_string'.
        """
        update_time = time()
        point, name, sub_num = self.parse_job_item(job_d)
        j_id = (f'{self.workflow_id}{ID_DELIM}{point}'
                f'{ID_DELIM}{name}{ID_DELIM}{sub_num}')
        try:
            j_delta = PbJob(stamp=f'{j_id}@{update_time}')
            time_attr = f'{event_key}_time'
            setattr(j_delta, time_attr, time_str)
            self.updates.setdefault(j_id, PbJob(id=j_id)).MergeFrom(j_delta)
            self.updates_pending = True
        except (TypeError, AttributeError) as exc:
            LOG.error(f'Unable to set {j_id} {time_attr} field: {str(exc)}')
Esempio n. 22
0
def _killpg(proc, signal):
    """Kill a process group."""
    try:
        os.killpg(proc.pid, signal)
    except ProcessLookupError:
        # process group has already exited
        return False
    except PermissionError:
        # process group may contain zombie processes which will result in
        # PermissionError on some systems, not sure what happens on others
        #
        # we could go through the processes in the group and call waitpid on
        # them but waitpid is blocking and this would be a messy solution for a
        # problem that shouldn't happen (it's really a bug in the Cylc subproc)
        LOG.error(f'Could not kill process group: {proc.pid}'
                  f'\nCommand: {" ".join(proc.args)}')
        return False
    return True
Esempio n. 23
0
    def file_install(self, platform):
        """Install required files on the remote install target.

        Included by default in the file installation:
            Files:
                .service/server.key  (required for ZMQ authentication)
            Directories:
                app/
                bin/
                etc/
                lib/
        """
        install_target = platform['install target']
        self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS
        src_path = get_workflow_run_dir(self.workflow)
        dst_path = get_remote_workflow_run_dir(self.workflow)
        install_target = platform['install target']
        try:
            cmd, host = construct_rsync_over_ssh_cmd(src_path,
                                                     dst_path,
                                                     platform,
                                                     self.rsync_includes,
                                                     bad_hosts=self.bad_hosts)
            ctx = SubProcContext('file-install', cmd, host)
        except NoHostsError as exc:
            LOG.error(
                PlatformError(
                    f'{PlatformError.MSG_INIT}\n{exc}',
                    platform['name'],
                ))
            self.remote_init_map[
                platform['install target']] = REMOTE_FILE_INSTALL_FAILED
            self.bad_hosts -= set(platform['hosts'])
            self.ready = True
        else:
            log_platform_event('file install', platform, host)
            self.proc_pool.put_command(
                ctx,
                bad_hosts=self.bad_hosts,
                callback=self._file_install_callback,
                callback_args=[platform, install_target],
                callback_255=self._file_install_callback_255,
            )
Esempio n. 24
0
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(
         SubProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1),
         suite, itask.point, itask.tdef.name, submit_num=itask.submit_num)
     if not dry_run:
         # Persist
         self.suite_db_mgr.put_insert_task_jobs(itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'batch_sys_name': itask.summary.get('batch_sys_name'),
         })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
Esempio n. 25
0
    def _process_message_submit_failed(self, itask, event_time):
        """Helper for process_message, handle a submit-failed message.

        Return True if no retries (hence go to the submit-failed state).
        """
        no_retries = False
        LOG.error('[%s] -%s', itask, self.EVENT_SUBMIT_FAILED)
        if event_time is None:
            event_time = get_current_time_string()
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "time_submit_exit": event_time,
            "submit_status": 1,
        })
        job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
        self.job_pool.set_job_state(job_d, TASK_STATUS_SUBMIT_FAILED)
        itask.summary['submit_method_id'] = None
        self.pflag = True
        if (
                TimerFlags.SUBMISSION_RETRY not in itask.try_timers
                or itask.try_timers[TimerFlags.SUBMISSION_RETRY].next() is None
        ):
            # No submission retry lined up: definitive failure.
            # See github #476.
            no_retries = True
            if itask.state.reset(TASK_STATUS_SUBMIT_FAILED):
                self.setup_event_handlers(
                    itask, self.EVENT_SUBMIT_FAILED,
                    f'job {self.EVENT_SUBMIT_FAILED}')
        else:
            # There is a submission retry lined up.
            timer = itask.try_timers[TimerFlags.SUBMISSION_RETRY]
            self._retry_task(itask, timer.timeout, submit_retry=True)
            delay_msg = f"submit-retrying in {timer.delay_timeout_as_str()}"
            if itask.state.is_held:
                delay_msg = f"held ({delay_msg})"
            msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
            LOG.info("[%s] -job(%02d) %s", itask, itask.submit_num, msg)
            itask.set_summary_message(msg)
            self.setup_event_handlers(
                itask, self.EVENT_SUBMIT_RETRY,
                f"job {self.EVENT_SUBMIT_FAILED}, {delay_msg}")
        self._reset_job_timers(itask)
        return no_retries
Esempio n. 26
0
 def load(self):
     """Load or reload configuration from files."""
     self.sparse.clear()
     self.dense.clear()
     LOG.debug("Loading site/user global config files")
     conf_path_str = os.getenv("CYLC_CONF_PATH")
     if conf_path_str is None:
         # CYLC_CONF_PATH not defined, use default locations.
         for conf_dir_1, conf_dir_2, conf_type in [
                 (self.SITE_CONF_DIR, self.SITE_CONF_DIR_OLD,
                  upgrader.SITE_CONFIG),
                 (self.USER_CONF_DIR_1, self.USER_CONF_DIR_2,
                  upgrader.USER_CONFIG)]:
             fname1 = os.path.join(conf_dir_1, self.CONF_BASE)
             fname2 = os.path.join(conf_dir_2, self.CONF_BASE)
             if os.access(fname1, os.F_OK | os.R_OK):
                 fname = fname1
             elif os.access(fname2, os.F_OK | os.R_OK):
                 fname = fname2
             else:
                 continue
             try:
                 self.loadcfg(fname, conf_type)
             except ParsecError as exc:
                 if conf_type == upgrader.SITE_CONFIG:
                     # Warn on bad site file (users can't fix it).
                     LOG.warning(
                         'ignoring bad %s %s:\n%s', conf_type, fname, exc)
                 else:
                     # Abort on bad user file (users can fix it).
                     LOG.error('bad %s %s', conf_type, fname)
                     raise
                 break
     elif conf_path_str:
         # CYLC_CONF_PATH defined with a value
         for path in conf_path_str.split(os.pathsep):
             fname = os.path.join(path, self.CONF_BASE)
             if os.access(fname, os.F_OK | os.R_OK):
                 self.loadcfg(fname, upgrader.USER_CONFIG)
     # (OK if no global.rc is found, just use system defaults).
     self.transform()
Esempio n. 27
0
    def load(self):
        """Load or reload configuration from files."""
        self.sparse.clear()
        self.dense.clear()
        LOG.debug("Loading site/user config files")
        conf_path_str = os.getenv("CYLC_CONF_PATH")
        if conf_path_str:
            # Explicit config file override.
            fname = os.path.join(conf_path_str, self.CONF_BASENAME)
            self._load(fname, upgrader.USER_CONFIG)
        elif conf_path_str is None:
            # Use default locations.
            for conf_type, conf_dir in self.conf_dir_hierarchy:
                fname = os.path.join(conf_dir, self.CONF_BASENAME)
                try:
                    self._load(fname, conf_type)
                except ParsecError:
                    LOG.error(f'bad {conf_type} {fname}')
                    raise

        self._set_default_editors()
Esempio n. 28
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     try:
         tmphandle.close()
     except OSError:  # E.g. ignore bad unlink, etc
         pass
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(TaskRemoteMgmtError(
         TaskRemoteMgmtError.MSG_INIT,
         (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd),
         proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Esempio n. 29
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     try:
         tmphandle.close()
     except OSError:  # E.g. ignore bad unlink, etc
         pass
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(
         TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, (host, owner),
                             ' '.join(quote(item) for item in proc_ctx.cmd),
                             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Esempio n. 30
0
def log_task_job_activity(ctx, suite, point, name, submit_num=None):
    """Log an activity for a task job."""
    ctx_str = str(ctx)
    if not ctx_str:
        return
    if isinstance(ctx.cmd_key, tuple):  # An event handler
        submit_num = ctx.cmd_key[-1]
    job_activity_log = get_task_job_activity_log(suite, point, name,
                                                 submit_num)
    try:
        with open(job_activity_log, "ab") as handle:
            handle.write((ctx_str + '\n').encode())
    except IOError as exc:
        # This happens when there is no job directory, e.g. if job host
        # selection command causes an submission failure, there will be no job
        # directory. In this case, just send the information to the suite log.
        LOG.exception(exc)
        LOG.info(ctx_str)
    if ctx.cmd and ctx.ret_code:
        LOG.error(ctx_str)
    elif ctx.cmd:
        LOG.debug(ctx_str)
Esempio n. 31
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error('[%s] -%s', itask, self.EVENT_SUBMIT_FAILED)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "time_submit_exit": event_time,
         "submit_status": 1,
     })
     job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
     self.job_pool.set_job_attr(job_d, 'batch_sys_job_id', None)
     itask.summary['submit_method_id'] = None
     self.pflag = True
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(itask, self.EVENT_SUBMIT_FAILED,
                                       'job %s' % self.EVENT_SUBMIT_FAILED)
             self.job_pool.set_job_state(job_d, TASK_STATUS_SUBMIT_FAILED)
     elif itask.state.reset_state(
             TASK_STATUS_SUBMIT_RETRYING,
             respect_hold_swap=True,
     ):
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         if itask.state.status == TASK_STATUS_HELD:
             delay_msg = "%s (%s)" % (TASK_STATUS_HELD, delay_msg)
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("[%s] -job(%02d) %s", itask, itask.submit_num, msg)
         itask.set_summary_message(msg)
         self.setup_event_handlers(
             itask, self.EVENT_SUBMIT_RETRY,
             "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
         self.job_pool.set_job_state(job_d, TASK_STATUS_SUBMIT_RETRYING)
     self._reset_job_timers(itask)
Esempio n. 32
0
 def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx):
     """Call back when log job retrieval completes."""
     if proc_ctx.ret_code:
         LOG.error(proc_ctx)
     else:
         LOG.debug(proc_ctx)
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             # All completed jobs are expected to have a "job.out".
             fnames = [JOB_LOG_OUT]
             try:
                 if key1[1] not in 'succeeded':
                     fnames.append(JOB_LOG_ERR)
             except TypeError:
                 pass
             fname_oks = {}
             for fname in fnames:
                 fname_oks[fname] = os.path.exists(
                     get_task_job_log(schd_ctx.suite, point, name,
                                      submit_num, fname))
             # All expected paths must exist to record a good attempt
             log_ctx = SubProcContext((key1, submit_num), None)
             if all(fname_oks.values()):
                 log_ctx.ret_code = 0
                 del self.event_timers[id_key]
             else:
                 log_ctx.ret_code = 1
                 log_ctx.err = "File(s) not retrieved:"
                 for fname, exist_ok in sorted(fname_oks.items()):
                     if not exist_ok:
                         log_ctx.err += " %s" % fname
                 self.event_timers[id_key].unset_waiting()
             log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                   submit_num)
         except KeyError as exc:
             LOG.exception(exc)
Esempio n. 33
0
    def _file_install_callback(self, ctx, platform, install_target):
        """Callback when file installation exits.

        Sets remote_init_map to REMOTE_FILE_INSTALL_DONE on success and to
        REMOTE_FILE_INSTALL_FAILED on error.
         """
        if ctx.out:
            RSYNC_LOG.info('File installation information for '
                           f'{install_target}:\n{ctx.out}')
        if ctx.ret_code == 0:
            # Both file installation and remote init success
            LOG.debug(f"File installation complete for {install_target}")
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE
            self.ready = True
            return
        else:
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_FAILED
            LOG.error(
                PlatformError(
                    PlatformError.MSG_INIT,
                    platform['name'],
                    ctx=ctx,
                ))
            self.ready = True
Esempio n. 34
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and event in self._get_events_conf(
                itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(itask, 'handler retry delays')
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            if event in self.NON_UNIQUE_EVENTS:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' %
                        (event, itask.non_unique_events.get(event, 1)))
            else:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event)
            id_key = (key1, str(itask.point), itask.tdef.name,
                      itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            platform_n = itask.summary['platforms_used'].get(
                itask.submit_num, '')
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    EventData.BatchSysJobID.value:
                    quote(str(itask.summary['submit_method_id'])),
                    EventData.BatchSysName.value:
                    quote(str(itask.summary['batch_sys_name'])),
                    EventData.CyclePoint.value:
                    quote(str(itask.point)),
                    EventData.Event.value:
                    quote(event),
                    EventData.FinishTime.value:
                    quote(str(itask.summary['finished_time_string'])),
                    EventData.ID.value:
                    quote(itask.identity),
                    EventData.Message.value:
                    quote(message),
                    EventData.TaskName.value:
                    quote(itask.tdef.name),
                    EventData.PlatformName.value:
                    quote(platform_n),
                    EventData.StartTime.value:
                    quote(str(itask.summary['started_time_string'])),
                    EventData.SubmitNum.value:
                    itask.submit_num,
                    EventData.SubmitTime.value:
                    quote(str(itask.summary['submitted_time_string'])),
                    EventData.Suite.value:
                    quote(self.suite),
                    EventData.SuiteUUID.value:
                    quote(str(self.uuid_str)),
                    EventData.TryNum.value:
                    itask.get_try_num(),
                    # task and suite metadata
                    **get_event_handler_data(itask.tdef.rtconfig, self.suite_cfg)
                }
                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite,
                                                  itask.identity, message)
            LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd)
            self.event_timers[id_key] = (TaskActionTimer(
                CustomTaskEventHandlerContext(
                    key1,
                    self.HANDLER_CUSTOM,
                    cmd,
                ), retry_delays))
Esempio n. 35
0
    def _remote_init_callback(
            self, proc_ctx, platform, tmphandle,
            curve_auth, client_pub_key_dir):
        """Callback when "cylc remote-init" exits"""
        self.ready = True
        try:
            tmphandle.close()
        except OSError:  # E.g. ignore bad unlink, etc
            pass
        self.install_target = platform['install target']
        if proc_ctx.ret_code == 0:
            if REMOTE_INIT_DONE in proc_ctx.out:
                src_path = get_suite_run_dir(self.suite)
                dst_path = get_remote_suite_run_dir(platform, self.suite)
                try:
                    process = procopen(construct_rsync_over_ssh_cmd(
                        src_path,
                        dst_path,
                        platform,
                        self.rsync_includes),
                        stdoutpipe=True,
                        stderrpipe=True,
                        universal_newlines=True)

                    out, err = process.communicate(timeout=600)
                    install_target = platform['install target']
                    if out:
                        RSYNC_LOG.info(
                            'File installation information for '
                            f'{install_target}:\n {out}')
                    if err:
                        LOG.error(
                            'File installation error on '
                            f'{install_target}:\n {err}')
                except Exception as ex:
                    LOG.error(f"Problem during rsync: {ex}")
                    self.remote_init_map[self.install_target] = (
                        REMOTE_INIT_FAILED)
                    return
            if "KEYSTART" in proc_ctx.out:
                regex_result = re.search(
                    'KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out)
                key = regex_result.group(1)
                suite_srv_dir = get_suite_srv_dir(self.suite)
                public_key = KeyInfo(
                    KeyType.PUBLIC,
                    KeyOwner.CLIENT,
                    suite_srv_dir=suite_srv_dir,
                    install_target=self.install_target
                )
                old_umask = os.umask(0o177)
                with open(
                        public_key.full_key_path,
                        'w', encoding='utf8') as text_file:
                    text_file.write(key)
                os.umask(old_umask)
                # configure_curve must be called every time certificates are
                # added or removed, in order to update the Authenticator's
                # state.
                curve_auth.configure_curve(
                    domain='*', location=(client_pub_key_dir))
            for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
                if status in proc_ctx.out:
                    # Good status
                    LOG.debug(proc_ctx)
                    self.remote_init_map[self.install_target] = status
                    return
        # Bad status
        LOG.error(TaskRemoteMgmtError(
            TaskRemoteMgmtError.MSG_INIT,
            platform['install target'], ' '.join(
                quote(item) for item in proc_ctx.cmd),
            proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
        LOG.error(proc_ctx)
        self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
Esempio n. 36
0
    def remote_init(self, platform: Dict[str, Any],
                    curve_auth: 'ThreadAuthenticator',
                    client_pub_key_dir: str) -> None:
        """Initialise a remote host if necessary.

        Call "cylc remote-init" to install workflow items to remote:
            ".service/contact": For TCP task communication
            "python/": if source exists

        Args:
            platform: A dict containing settings relating to platform used in
                this remote installation.
            curve_auth: The ZMQ authenticator.
            client_pub_key_dir: Client public key directory, used by the
                ZMQ authenticator.

        """
        install_target = platform['install target']
        if install_target == get_localhost_install_target():
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE
            return

        # Set status of install target to in progress while waiting for remote
        # initialisation to finish
        self.remote_init_map[install_target] = REMOTE_INIT_IN_PROGRESS

        # Determine what items to install
        comms_meth: CommsMeth = CommsMeth(platform['communication method'])
        items = self._remote_init_items(comms_meth)

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # Build the remote-init command to be run over ssh
        cmd = ['remote-init']
        cmd.extend(verbosity_to_opts(cylc.flow.flags.verbosity))
        cmd.append(str(install_target))
        cmd.append(get_remote_workflow_run_dir(self.workflow))
        dirs_to_symlink = get_dirs_to_symlink(install_target, self.workflow)
        for key, value in dirs_to_symlink.items():
            if value is not None:
                cmd.append(f"{key}={quote(value)} ")
        # Create the ssh command
        try:
            host = get_host_from_platform(platform, bad_hosts=self.bad_hosts)
        except NoHostsError as exc:
            LOG.error(
                PlatformError(
                    f'{PlatformError.MSG_INIT}\n{exc}',
                    platform['name'],
                ))
            self.remote_init_map[
                platform['install target']] = REMOTE_INIT_FAILED
            self.bad_hosts -= set(platform['hosts'])
            self.ready = True
        else:
            log_platform_event('remote init', platform, host)
            cmd = construct_ssh_cmd(cmd, platform, host)
            self.proc_pool.put_command(
                SubProcContext('remote-init',
                               cmd,
                               stdin_files=[tmphandle],
                               host=host),
                bad_hosts=self.bad_hosts,
                callback=self._remote_init_callback,
                callback_args=[
                    platform, tmphandle, curve_auth, client_pub_key_dir
                ],
                callback_255=self._remote_init_callback_255,
                callback_255_args=[platform])
Esempio n. 37
0
def _test_callback_255(ctx, foo=''):
    """Very Simple test callback function"""
    LOG.error(f'255 callback called.{foo}')
Esempio n. 38
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and event in self._get_events_conf(
                itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(
            itask, 'handler retry delays',
            self.get_host_conf(itask, "task event handler retry delays"))
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            if event in self.NON_UNIQUE_EVENTS:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' %
                        (event, itask.non_unique_events.get(event, 1)))
            else:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event)
            id_key = (key1, str(itask.point), itask.tdef.name,
                      itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '')
            if user_at_host and '@' not in user_at_host:
                # (only has 'user@' on the front if user is not suite owner).
                user_at_host = '%s@%s' % (get_user(), user_at_host)
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    "event":
                    quote(event),
                    "suite":
                    quote(self.suite),
                    'suite_uuid':
                    quote(str(self.uuid_str)),
                    "point":
                    quote(str(itask.point)),
                    "name":
                    quote(itask.tdef.name),
                    "submit_num":
                    itask.submit_num,
                    "try_num":
                    itask.get_try_num(),
                    "id":
                    quote(itask.identity),
                    "message":
                    quote(message),
                    "batch_sys_name":
                    quote(str(itask.summary['batch_sys_name'])),
                    "batch_sys_job_id":
                    quote(str(itask.summary['submit_method_id'])),
                    "submit_time":
                    quote(str(itask.summary['submitted_time_string'])),
                    "start_time":
                    quote(str(itask.summary['started_time_string'])),
                    "finish_time":
                    quote(str(itask.summary['finished_time_string'])),
                    "user@host":
                    quote(user_at_host)
                }

                if self.suite_cfg:
                    for key, value in self.suite_cfg.items():
                        if key == "URL":
                            handler_data["suite_url"] = quote(value)
                        else:
                            handler_data["suite_" + key] = quote(value)

                if itask.tdef.rtconfig['meta']:
                    for key, value in itask.tdef.rtconfig['meta'].items():
                        if key == "URL":
                            handler_data["task_url"] = quote(value)
                        handler_data[key] = quote(value)

                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite,
                                                  itask.identity, message)
            LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd)
            self.event_timers[id_key] = (TaskActionTimer(
                CustomTaskEventHandlerContext(
                    key1,
                    self.HANDLER_CUSTOM,
                    cmd,
                ), retry_delays))