Beispiel #1
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(
         itask, {
             "time_submit_exit": get_current_time_string(),
             "submit_status": 1,
         })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(itask, self.EVENT_SUBMIT_FAILED,
                                       'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
Beispiel #2
0
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(SuiteProcContext(self.JOBS_SUBMIT,
                                            action,
                                            err=exc,
                                            ret_code=1),
                           suite,
                           itask.point,
                           itask.tdef.name,
                           submit_num=itask.submit_num)
     if not dry_run:
         # Perist
         self.suite_db_mgr.put_insert_task_jobs(
             itask, {
                 'is_manual_submit': itask.is_manual_submit,
                 'try_num': itask.get_try_num(),
                 'time_submit': get_current_time_string(),
                 'batch_sys_name': itask.summary.get('batch_sys_name'),
             })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
Beispiel #3
0
    def _prep_submit_task_job(self, suite, itask, dry_run):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask)
            local_job_file_path = self.task_events_mgr.get_task_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num,
                self.JOB_FILE_BASE)
            self.job_file_writer.write(local_job_file_path, job_conf)
        except Exception, exc:
            # Could be a bad command template.
            LOG.error(traceback.format_exc())
            self.task_events_mgr.log_task_job_activity(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    '(prepare job file)', err=exc, ret_code=1),
                suite, itask.point, itask.tdef.name)
            if not dry_run:
                self.task_events_mgr.process_message(
                    itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                    self.poll_task_jobs)
            return
Beispiel #4
0
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "time_submit_exit": get_current_time_string(),
         "submit_status": 1,
     })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_FAILED,
                 'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
Beispiel #5
0
    def _prep_submit_task_job(self, suite, itask, dry_run):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask)
            local_job_file_path = self.task_events_mgr.get_task_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num,
                self.JOB_FILE_BASE)
            self.job_file_writer.write(local_job_file_path, job_conf)
        except Exception, exc:
            # Could be a bad command template.
            ERR.error(traceback.format_exc())
            LOG.error(traceback.format_exc())
            self.task_events_mgr.log_task_job_activity(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    '(prepare job file)', err=exc, ret_code=1),
                suite, itask.point, itask.tdef.name)
            if not dry_run:
                self.task_events_mgr.process_message(
                    itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
            return
Beispiel #6
0
    def clear_broadcast(self,
                        point_strings=None,
                        namespaces=None,
                        cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list
                                  or keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(self._prune(), point_strings,
                                            namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(get_broadcast_change_report(modified_settings,
                                             is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return (modified_settings, bad_options)
Beispiel #7
0
 def _run_event_handlers_callback(proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         LOG.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Beispiel #8
0
 def _run_event_handlers_callback(self, proc_ctx, abort_on_error=False):
     """Callback on completion of a suite event handler."""
     if proc_ctx.ret_code:
         msg = '%s EVENT HANDLER FAILED' % proc_ctx.cmd_key[1]
         LOG.error(str(proc_ctx))
         ERR.error(msg)
         if abort_on_error:
             raise SuiteEventError(msg)
     else:
         LOG.info(str(proc_ctx))
Beispiel #9
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (ctx.event in self.get_events_conf(
             config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'suite': quote(ctx.suite),
                 'message': quote(ctx.reason),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (handler, ctx.event, ctx.suite,
                                          ctx.reason)
         proc_ctx = SuiteProcContext(cmd_key,
                                     cmd,
                                     env=dict(os.environ),
                                     shell=True)
         if abort_on_error or self.proc_pool.is_closed():
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(proc_ctx,
                                        self._run_event_handlers_callback)
Beispiel #10
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Beispiel #11
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Beispiel #12
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (
             ctx.event in
             self.get_events_conf(config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'suite': quote(ctx.suite),
                 'message': quote(ctx.reason),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (
                 handler, ctx.event, ctx.suite, ctx.reason)
         proc_ctx = SuiteProcContext(
             cmd_key, cmd, env=dict(os.environ), shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_handlers_callback)
 def _manip_task_jobs_callback(ctx,
                               suite,
                               itasks,
                               summary_callback,
                               more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     tasks = {}
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     for itask in itasks:
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(BatchSysManager.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
         # Something is very wrong here
         # Fallback to use "job_log_dirs" list to report the problem
         job_log_dirs = ctx.cmd_kwargs.get("job_log_dirs", [])
         for job_log_dir in job_log_dirs:
             point, name, submit_num = job_log_dir.split(os.sep, 2)
             itask = tasks[(point, name, submit_num)]
             out += (BatchSysManager.OUT_PREFIX_SUMMARY +
                     "|".join([ctx.timestamp, job_log_dir, "1"]) + "\n")
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (KeyError, ValueError):
                     if cylc.flags.debug:
                         LOG.warning('Unhandled %s output: %s' %
                                     (ctx.cmd_key, line))
                         LOG.warning(traceback.format_exc())
Beispiel #14
0
 def _manip_task_jobs_callback(
         ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     tasks = {}
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     for itask in itasks:
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(BatchSysManager.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
         # Something is very wrong here
         # Fallback to use "job_log_dirs" list to report the problem
         job_log_dirs = ctx.cmd_kwargs.get("job_log_dirs", [])
         for job_log_dir in job_log_dirs:
             point, name, submit_num = job_log_dir.split(os.sep, 2)
             itask = tasks[(point, name, submit_num)]
             out += (BatchSysManager.OUT_PREFIX_SUMMARY +
                     "|".join([ctx.timestamp, job_log_dir, "1"]) + "\n")
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (KeyError, ValueError):
                     if cylc.flags.debug:
                         LOG.warning('Unhandled %s output: %s' % (
                             ctx.cmd_key, line))
                         LOG.warning(traceback.format_exc())
Beispiel #15
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     tmphandle.close()
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(TaskRemoteMgmtError(
         TaskRemoteMgmtError.MSG_INIT,
         (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd),
         proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Beispiel #16
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     tmphandle.close()
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(
         TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, (host, owner),
                             ' '.join(quote(item) for item in proc_ctx.cmd),
                             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Beispiel #17
0
 def log_task_job_activity(self, ctx, suite, point, name, submit_num=NN):
     """Log an activity for a task job."""
     ctx_str = str(ctx)
     if not ctx_str:
         return
     if isinstance(ctx.cmd_key, tuple):  # An event handler
         submit_num = ctx.cmd_key[-1]
     job_activity_log = self.get_task_job_activity_log(
         suite, point, name, submit_num)
     try:
         with open(job_activity_log, "ab") as handle:
             handle.write(ctx_str + '\n')
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
     if ctx.cmd and ctx.ret_code:
         LOG.error(ctx_str)
     elif ctx.cmd:
         LOG.debug(ctx_str)
Beispiel #18
0
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(
         SuiteProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1),
         suite, itask.point, itask.tdef.name, submit_num=itask.submit_num)
     if not dry_run:
         # Perist
         self.suite_db_mgr.put_insert_task_jobs(itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'batch_sys_name': itask.summary.get('batch_sys_name'),
         })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
Beispiel #19
0
def log_task_job_activity(ctx, suite, point, name, submit_num=None):
    """Log an activity for a task job."""
    ctx_str = str(ctx)
    if not ctx_str:
        return
    if isinstance(ctx.cmd_key, tuple):  # An event handler
        submit_num = ctx.cmd_key[-1]
    job_activity_log = get_task_job_activity_log(
        suite, point, name, submit_num)
    try:
        with open(job_activity_log, "ab") as handle:
            handle.write(ctx_str + '\n')
    except IOError as exc:
        # This happens when there is no job directory, e.g. if job host
        # selection command causes an submission failure, there will be no job
        # directory. In this case, just send the information to the suite log.
        LOG.debug(exc)
        LOG.info(ctx_str)
    if ctx.cmd and ctx.ret_code:
        LOG.error(ctx_str)
    elif ctx.cmd:
        LOG.debug(ctx_str)
Beispiel #20
0
def log_task_job_activity(ctx, suite, point, name, submit_num=None):
    """Log an activity for a task job."""
    ctx_str = str(ctx)
    if not ctx_str:
        return
    if isinstance(ctx.cmd_key, tuple):  # An event handler
        submit_num = ctx.cmd_key[-1]
    job_activity_log = get_task_job_activity_log(suite, point, name,
                                                 submit_num)
    try:
        with open(job_activity_log, "ab") as handle:
            handle.write(ctx_str + '\n')
    except IOError as exc:
        # This happens when there is no job directory, e.g. if job host
        # selection command causes an submission failure, there will be no job
        # directory. In this case, just send the information to the suite log.
        LOG.debug(exc)
        LOG.info(ctx_str)
    if ctx.cmd and ctx.ret_code:
        LOG.error(ctx_str)
    elif ctx.cmd:
        LOG.debug(ctx_str)
Beispiel #21
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and
                event in self._get_events_conf(itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(
            itask,
            'handler retry delays',
            self.get_host_conf(itask, "task event handler retry delays"))
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            key1 = ("%s-%02d" % (self.HANDLER_CUSTOM, i), event)
            id_key = (
                key1, str(itask.point), itask.tdef.name, itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '')
            if user_at_host and '@' not in user_at_host:
                # (only has 'user@' on the front if user is not suite owner).
                user_at_host = '%s@%s' % (get_user(), user_at_host)
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    "event": quote(event),
                    "suite": quote(self.suite),
                    "point": quote(str(itask.point)),
                    "name": quote(itask.tdef.name),
                    "submit_num": itask.submit_num,
                    "id": quote(itask.identity),
                    "message": quote(message),
                    "batch_sys_name": quote(
                        str(itask.summary['batch_sys_name'])),
                    "batch_sys_job_id": quote(
                        str(itask.summary['submit_method_id'])),
                    "submit_time": quote(
                        str(itask.summary['submitted_time_string'])),
                    "start_time": quote(
                        str(itask.summary['started_time_string'])),
                    "finish_time": quote(
                        str(itask.summary['finished_time_string'])),
                    "user@host": quote(user_at_host)
                }

                if self.suite_cfg:
                    for key, value in self.suite_cfg.items():
                        if key == "URL":
                            handler_data["suite_url"] = quote(value)
                        else:
                            handler_data["suite_" + key] = quote(value)

                if itask.tdef.rtconfig['meta']:
                    for key, value in itask.tdef.rtconfig['meta'].items():
                        if key == "URL":
                            handler_data["task_url"] = quote(value)
                        handler_data[key] = quote(value)

                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (
                    handler, event, self.suite, itask.identity, message)
            LOG.debug("Queueing %s handler: %s" % (event, cmd), itask=itask)
            self.event_timers[id_key] = (
                TaskActionTimer(
                    CustomTaskEventHandlerContext(
                        key1,
                        self.HANDLER_CUSTOM,
                        cmd,
                    ),
                    retry_delays))
Beispiel #22
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and event in self._get_events_conf(
                itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(
            itask, 'handler retry delays',
            self.get_host_conf(itask, "task event handler retry delays"))
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            key1 = ("%s-%02d" % (self.HANDLER_CUSTOM, i), event)
            id_key = (key1, str(itask.point), itask.tdef.name,
                      itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '')
            if user_at_host and '@' not in user_at_host:
                # (only has 'user@' on the front if user is not suite owner).
                user_at_host = '%s@%s' % (get_user(), user_at_host)
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    "event":
                    quote(event),
                    "suite":
                    quote(self.suite),
                    "point":
                    quote(str(itask.point)),
                    "name":
                    quote(itask.tdef.name),
                    "submit_num":
                    itask.submit_num,
                    "id":
                    quote(itask.identity),
                    "message":
                    quote(message),
                    "batch_sys_name":
                    quote(str(itask.summary['batch_sys_name'])),
                    "batch_sys_job_id":
                    quote(str(itask.summary['submit_method_id'])),
                    "submit_time":
                    quote(str(itask.summary['submitted_time_string'])),
                    "start_time":
                    quote(str(itask.summary['started_time_string'])),
                    "finish_time":
                    quote(str(itask.summary['finished_time_string'])),
                    "user@host":
                    quote(user_at_host)
                }

                if self.suite_cfg:
                    for key, value in self.suite_cfg.items():
                        if key == "URL":
                            handler_data["suite_url"] = quote(value)
                        else:
                            handler_data["suite_" + key] = quote(value)

                if itask.tdef.rtconfig['meta']:
                    for key, value in itask.tdef.rtconfig['meta'].items():
                        if key == "URL":
                            handler_data["task_url"] = quote(value)
                        handler_data[key] = quote(value)

                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite,
                                                  itask.identity, message)
            LOG.debug("Queueing %s handler: %s" % (event, cmd), itask=itask)
            self.event_timers[id_key] = (TaskActionTimer(
                CustomTaskEventHandlerContext(
                    key1,
                    self.HANDLER_CUSTOM,
                    cmd,
                ), retry_delays))
Beispiel #23
0
    def clear(self, point_strings=None, namespaces=None, cancel_settings=None):
        """Clear settings globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed settings.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """

        if hasattr(cherrypy.request, "json"):
            point_strings = (
                cherrypy.request.json.get("point_strings", point_strings))
            namespaces = (
                cherrypy.request.json.get("namespaces", namespaces))
            cancel_settings = (
                cherrypy.request.json.get("cancel_settings", cancel_settings))
            point_strings = unicode_encode(point_strings)
            namespaces = unicode_encode(namespaces)
            cancel_settings = unicode_encode(cancel_settings)
        # If cancel_settings defined, only clear specific settings
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear settings
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.settings.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list or
                                    keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(
            self._prune(), point_strings, namespaces, cancel_keys_list)

        # Log the broadcast
        self._append_db_queue(modified_settings, is_cancel=True)
        LOG.info(
            get_broadcast_change_report(modified_settings, is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return (modified_settings, bad_options)