コード例 #1
0
ファイル: task_events_mgr.py プロジェクト: jonnyhtw/cylc
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "time_submit_exit": get_current_time_string(),
         "submit_status": 1,
     })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_FAILED,
                 'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
コード例 #2
0
ファイル: task_events_mgr.py プロジェクト: ahuang11/cylc
 def _process_message_submit_failed(self, itask, event_time):
     """Helper for process_message, handle a submit-failed message."""
     LOG.error(self.EVENT_SUBMIT_FAILED, itask=itask)
     if event_time is None:
         event_time = get_current_time_string()
     self.suite_db_mgr.put_update_task_jobs(
         itask, {
             "time_submit_exit": get_current_time_string(),
             "submit_status": 1,
         })
     itask.summary['submit_method_id'] = None
     if (TASK_STATUS_SUBMIT_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].next() is None):
         # No submission retry lined up: definitive failure.
         self.pflag = True
         # See github #476.
         if itask.state.reset_state(TASK_STATUS_SUBMIT_FAILED):
             self.setup_event_handlers(itask, self.EVENT_SUBMIT_FAILED,
                                       'job %s' % self.EVENT_SUBMIT_FAILED)
     else:
         # There is a submission retry lined up.
         timer = itask.try_timers[TASK_STATUS_SUBMIT_RETRYING]
         delay_msg = "submit-retrying in %s" % timer.delay_timeout_as_str()
         msg = "%s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_SUBMIT_RETRYING):
             self.setup_event_handlers(
                 itask, self.EVENT_SUBMIT_RETRY,
                 "job %s, %s" % (self.EVENT_SUBMIT_FAILED, delay_msg))
     self._reset_job_timers(itask)
コード例 #3
0
    def _send_by_remote_port(self, messages):
        """Send message by talking to the daemon (remote?) port."""
        from cylc.network.httpclient import (SuiteRuntimeServiceClient,
                                             ClientError, ClientInfoError)

        # Convert time/duration into appropriate units
        retry_intvl = float(
            self.env_map.get(SuiteSrvFilesManager.KEY_TASK_MSG_RETRY_INTVL,
                             self.MSG_RETRY_INTVL))
        max_tries = int(
            self.env_map.get(SuiteSrvFilesManager.KEY_TASK_MSG_MAX_TRIES,
                             self.MSG_MAX_TRIES))

        client = SuiteRuntimeServiceClient(
            self.suite,
            owner=self.env_map.get(SuiteSrvFilesManager.KEY_OWNER),
            host=self.env_map.get(SuiteSrvFilesManager.KEY_HOST),
            port=self.env_map.get(SuiteSrvFilesManager.KEY_PORT),
            timeout=float(
                self.env_map.get(SuiteSrvFilesManager.KEY_TASK_MSG_TIMEOUT,
                                 self.MSG_TIMEOUT)),
            comms_protocol=self.env_map.get(
                SuiteSrvFilesManager.KEY_COMMS_PROTOCOL))
        for i in range(1, max_tries + 1):  # 1..max_tries inclusive
            try:
                for message in messages:
                    client.put_message(self.task_id, self.severity, message)
            except ClientError as exc:
                sys.stderr.write(
                    "%s WARNING - Send message: try %s of %s failed: %s\n" %
                    (get_current_time_string(), i, max_tries, exc))
                # Break if:
                # * Exhausted number of tries.
                # * Contact info file not found, suite probably not running.
                #   Don't bother with retry, suite restart will poll any way.
                if i >= max_tries or isinstance(exc, ClientInfoError):
                    # Issue a warning and let the task carry on
                    sys.stderr.write("%s WARNING - MESSAGE SEND FAILED\n" %
                                     (get_current_time_string()))
                else:
                    sys.stderr.write(
                        "   retry in %s seconds, timeout is %s\n" %
                        (retry_intvl, client.timeout))
                    sleep(retry_intvl)
                    # Reset in case contact info or passphrase change
                    client.host = None
                    client.port = None
                    client.auth = None
            else:
                if i > 1:
                    # Continue to write to STDERR, so users can easily see that
                    # it has recovered from previous failures.
                    sys.stderr.write(
                        "%s INFO - Send message: try %s of %s succeeded\n" %
                        (get_current_time_string(), i, max_tries))
                break
コード例 #4
0
def _run_command(ctx):
    """Execute a shell command and capture its output and exit status."""

    if cylc.flags.debug:
        if ctx.cmd_kwargs.get('shell'):
            OUT.debug(ctx.cmd)
        else:
            OUT.debug("%s\n" %
                      ' '.join([quote(cmd_str) for cmd_str in ctx.cmd]))

    if (SuiteProcPool.STOP_JOB_SUBMISSION.value
            and ctx.cmd_key == SuiteProcPool.JOBS_SUBMIT):
        ctx.err = "job submission skipped (suite stopping)"
        ctx.ret_code = SuiteProcPool.JOB_SKIPPED_FLAG
        ctx.timestamp = get_current_time_string()
        return ctx

    try:
        stdin_file = None
        if ctx.cmd_kwargs.get('stdin_file_paths'):
            stdin_file = TemporaryFile()
            for file_path in ctx.cmd_kwargs['stdin_file_paths']:
                for line in open(file_path):
                    stdin_file.write(line)
            stdin_file.seek(0)
        elif ctx.cmd_kwargs.get('stdin_str'):
            stdin_file = PIPE
        proc = Popen(ctx.cmd,
                     stdin=stdin_file,
                     stdout=PIPE,
                     stderr=PIPE,
                     env=ctx.cmd_kwargs.get('env'),
                     shell=ctx.cmd_kwargs.get('shell'))
    except IOError as exc:
        if cylc.flags.debug:
            traceback.print_exc()
        ctx.ret_code = 1
        ctx.err = str(exc)
    except OSError as exc:
        if exc.filename is None:
            exc.filename = ctx.cmd[0]
        if cylc.flags.debug:
            traceback.print_exc()
        ctx.ret_code = 1
        ctx.err = str(exc)
    else:
        ctx.out, ctx.err = proc.communicate(ctx.cmd_kwargs.get('stdin_str'))
        ctx.ret_code = proc.wait()

    ctx.timestamp = get_current_time_string()
    return ctx
コード例 #5
0
ファイル: mp_pool.py プロジェクト: oliver-sanders/cylc
def _run_command(ctx):
    """Execute a shell command and capture its output and exit status."""

    if cylc.flags.debug:
        if ctx.cmd_kwargs.get('shell'):
            OUT.debug(ctx.cmd)
        else:
            OUT.debug(
                "%s\n" % ' '.join([quote(cmd_str) for cmd_str in ctx.cmd]))

    if (SuiteProcPool.STOP_JOB_SUBMISSION.value and
            ctx.cmd_key == SuiteProcPool.JOBS_SUBMIT):
        ctx.err = "job submission skipped (suite stopping)"
        ctx.ret_code = SuiteProcPool.JOB_SKIPPED_FLAG
        ctx.timestamp = get_current_time_string()
        return ctx

    try:
        stdin_file = None
        if ctx.cmd_kwargs.get('stdin_file_paths'):
            stdin_file = TemporaryFile()
            for file_path in ctx.cmd_kwargs['stdin_file_paths']:
                for line in open(file_path):
                    stdin_file.write(line)
            stdin_file.seek(0)
        elif ctx.cmd_kwargs.get('stdin_str'):
            stdin_file = PIPE
        proc = Popen(
            ctx.cmd, stdin=stdin_file, stdout=PIPE, stderr=PIPE,
            env=ctx.cmd_kwargs.get('env'), shell=ctx.cmd_kwargs.get('shell'))
    except IOError as exc:
        if cylc.flags.debug:
            traceback.print_exc()
        ctx.ret_code = 1
        ctx.err = str(exc)
    except OSError as exc:
        if exc.filename is None:
            exc.filename = ctx.cmd[0]
        if cylc.flags.debug:
            traceback.print_exc()
        ctx.ret_code = 1
        ctx.err = str(exc)
    else:
        ctx.out, ctx.err = proc.communicate(ctx.cmd_kwargs.get('stdin_str'))
        ctx.ret_code = proc.wait()

    ctx.timestamp = get_current_time_string()
    return ctx
コード例 #6
0
ファイル: task_events_mgr.py プロジェクト: dpmatthews/cylc
 def _process_message_failed(self, itask, event_time, message):
     """Helper for process_message, handle a failed message."""
     if event_time is None:
         event_time = get_current_time_string()
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 1,
         "time_run_exit": event_time,
     })
     if (TASK_STATUS_RETRYING not in itask.try_timers or
             itask.try_timers[TASK_STATUS_RETRYING].next() is None):
         # No retry lined up: definitive failure.
         self.pflag = True
         if itask.state.reset_state(TASK_STATUS_FAILED):
             self.setup_event_handlers(itask, "failed", message)
         LOG.critical(
             "[%s] -job(%02d) %s", itask, itask.submit_num, "failed")
     else:
         # There is a retry lined up
         delay_msg = "retrying in %s" % (
             itask.try_timers[TASK_STATUS_RETRYING].delay_timeout_as_str())
         msg = "failed, %s" % (delay_msg)
         LOG.info("[%s] -job(%02d) %s", itask, itask.submit_num, msg)
         itask.set_summary_message(msg)
         if itask.state.reset_state(TASK_STATUS_RETRYING):
             self.setup_event_handlers(
                 itask, "retry", "%s, %s" % (self.JOB_FAILED, delay_msg))
     self._reset_job_timers(itask)
コード例 #7
0
ファイル: batch_sys_manager.py プロジェクト: arjclark/cylc
 def _filter_submit_output(self, st_file_path, batch_sys, out, err):
     """Filter submit command output, if relevant."""
     job_id = None
     if hasattr(batch_sys, "REC_ID_FROM_SUBMIT_ERR"):
         text = err
         rec_id = batch_sys.REC_ID_FROM_SUBMIT_ERR
     elif hasattr(batch_sys, "REC_ID_FROM_SUBMIT_OUT"):
         text = out
         rec_id = batch_sys.REC_ID_FROM_SUBMIT_OUT
     if rec_id:
         for line in str(text).splitlines():
             match = rec_id.match(line)
             if match:
                 job_id = match.group("id")
                 job_status_file = open(st_file_path, "a")
                 job_status_file.write("%s=%s\n" % (
                     self.CYLC_BATCH_SYS_JOB_ID, job_id))
                 job_status_file.write("%s=%s\n" % (
                     self.CYLC_BATCH_SYS_JOB_SUBMIT_TIME,
                     get_current_time_string()))
                 job_status_file.close()
                 break
     if hasattr(batch_sys, "filter_submit_output"):
         out, err = batch_sys.filter_submit_output(out, err)
     return out, err, job_id
コード例 #8
0
ファイル: mp_pool.py プロジェクト: arjclark/cylc
 def _run_command_exit(cls, ctx, callback=None, callback_args=None):
     """Process command completion."""
     ctx.timestamp = get_current_time_string()
     if callable(callback):
         if not callback_args:
             callback_args = []
         callback(ctx, *callback_args)
コード例 #9
0
ファイル: task_events_mgr.py プロジェクト: ahuang11/cylc
 def _process_message_failed(self, itask, event_time, message):
     """Helper for process_message, handle a failed message."""
     if event_time is None:
         event_time = get_current_time_string()
     itask.set_summary_time('finished', event_time)
     self.suite_db_mgr.put_update_task_jobs(itask, {
         "run_status": 1,
         "time_run_exit": event_time,
     })
     if (TASK_STATUS_RETRYING not in itask.try_timers
             or itask.try_timers[TASK_STATUS_RETRYING].next() is None):
         # No retry lined up: definitive failure.
         self.pflag = True
         if itask.state.reset_state(TASK_STATUS_FAILED):
             self.setup_event_handlers(itask, "failed", message)
         LOG.critical("job(%02d) %s" % (itask.submit_num, "failed"),
                      itask=itask)
     else:
         # There is a retry lined up
         delay_msg = "retrying in %s" % (
             itask.try_timers[TASK_STATUS_RETRYING].delay_timeout_as_str())
         msg = "failed, %s" % (delay_msg)
         LOG.info("job(%02d) %s" % (itask.submit_num, msg), itask=itask)
         itask.summary['latest_message'] = msg
         if itask.state.reset_state(TASK_STATUS_RETRYING):
             self.setup_event_handlers(
                 itask, "retry", "%s, %s" % (self.JOB_FAILED, delay_msg))
     self._reset_job_timers(itask)
コード例 #10
0
ファイル: rundb.py プロジェクト: oliver-sanders/cylc
    def take_checkpoints(self, event, other_daos=None):
        """Add insert items to *_checkpoints tables.

        Select items in suite_params, broadcast_states and task_pool and
        prepare them for insert into the relevant *_checkpoints tables, and
        prepare an insert into the checkpoint_id table the event and the
        current time.

        If other_daos is a specified, it should be a list of CylcSuiteDAO
        objects.  The logic will prepare insertion of the same items into the
        *_checkpoints tables of these DAOs as well.
        """
        id_ = 1
        for (max_id,) in self.connect().execute("SELECT MAX(id) FROM %(table)s" % {"table": self.TABLE_CHECKPOINT_ID}):
            if max_id >= id_:
                id_ = max_id + 1
        daos = [self]
        if other_daos:
            daos.extend(other_daos)
        for dao in daos:
            dao.tables[self.TABLE_CHECKPOINT_ID].add_insert_item([id_, get_current_time_string(), event])
        for table_name in [self.TABLE_SUITE_PARAMS, self.TABLE_BROADCAST_STATES, self.TABLE_TASK_POOL]:
            for row in self.connect().execute("SELECT * FROM %s" % table_name):
                for dao in daos:
                    dao.tables[table_name + "_checkpoints"].add_insert_item([id_] + list(row))
コード例 #11
0
ファイル: task_job_mgr.py プロジェクト: andymoorhouse/cylc
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(SubProcContext(self.JOBS_SUBMIT,
                                          action,
                                          err=exc,
                                          ret_code=1),
                           suite,
                           itask.point,
                           itask.tdef.name,
                           submit_num=itask.submit_num)
     if not dry_run:
         # Persist
         self.suite_db_mgr.put_insert_task_jobs(
             itask, {
                 'is_manual_submit': itask.is_manual_submit,
                 'try_num': itask.get_try_num(),
                 'time_submit': get_current_time_string(),
                 'batch_sys_name': itask.summary.get('batch_sys_name'),
             })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
コード例 #12
0
ファイル: task_job_mgr.py プロジェクト: jonnyhtw/cylc
    def _poll_task_job_callback(self, suite, itask, cmd_ctx, line):
        """Helper for _poll_task_jobs_callback, on one task job."""
        ctx = SuiteProcContext(self.JOBS_POLL, None)
        ctx.out = line
        ctx.ret_code = 0

        items = line.split("|")
        # See cylc.batch_sys_manager.JobPollContext
        try:
            (
                batch_sys_exit_polled, run_status, run_signal,
                time_submit_exit, time_run, time_run_exit
            ) = items[4:10]
        except IndexError:
            itask.summary['latest_message'] = 'poll failed'
            cylc.flags.iflag = True
            ctx.cmd = cmd_ctx.cmd  # print original command on failure
            return
        finally:
            log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
        flag = self.task_events_mgr.POLLED_FLAG
        if run_status == "1" and run_signal in ["ERR", "EXIT"]:
            # Failed normally
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, time_run_exit, flag)
        elif run_status == "1" and batch_sys_exit_polled == "1":
            # Failed by a signal, and no longer in batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, time_run_exit, flag)
            self.task_events_mgr.process_message(
                itask, INFO, FAIL_MESSAGE_PREFIX + run_signal, time_run_exit,
                flag)
        elif run_status == "1":
            # The job has terminated, but is still managed by batch system.
            # Some batch system may restart a job in this state, so don't
            # mark as failed yet.
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_STARTED, time_run, flag)
        elif run_status == "0":
            # The job succeeded
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_SUCCEEDED, time_run_exit, flag)
        elif time_run and batch_sys_exit_polled == "1":
            # The job has terminated without executing the error trap
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_FAILED, get_current_time_string(),
                flag)
        elif time_run:
            # The job has started, and is still managed by batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_OUTPUT_STARTED, time_run, flag)
        elif batch_sys_exit_polled == "1":
            # The job never ran, and no longer in batch system
            self.task_events_mgr.process_message(
                itask, INFO, self.task_events_mgr.EVENT_SUBMIT_FAILED,
                time_submit_exit, flag)
        else:
            # The job never ran, and is in batch system
            self.task_events_mgr.process_message(
                itask, INFO, TASK_STATUS_SUBMITTED, time_submit_exit, flag)
コード例 #13
0
ファイル: batch_sys_manager.py プロジェクト: wenlien/cylc
 def _filter_submit_output(self, st_file_path, batch_sys, out, err):
     """Filter submit command output, if relevant."""
     job_id = None
     if hasattr(batch_sys, "REC_ID_FROM_SUBMIT_ERR"):
         text = err
         rec_id = batch_sys.REC_ID_FROM_SUBMIT_ERR
     elif hasattr(batch_sys, "REC_ID_FROM_SUBMIT_OUT"):
         text = out
         rec_id = batch_sys.REC_ID_FROM_SUBMIT_OUT
     if rec_id:
         for line in str(text).splitlines():
             match = rec_id.match(line)
             if match:
                 job_id = match.group("id")
                 job_status_file = open(st_file_path, "a")
                 job_status_file.write("%s=%s\n" % (
                     self.CYLC_BATCH_SYS_JOB_ID, job_id))
                 job_status_file.write("%s=%s\n" % (
                     self.CYLC_BATCH_SYS_JOB_SUBMIT_TIME,
                     get_current_time_string()))
                 job_status_file.close()
                 break
     if hasattr(batch_sys, "filter_submit_output"):
         out, err = batch_sys.filter_submit_output(out, err)
     return out, err, job_id
コード例 #14
0
ファイル: batch_sys_manager.py プロジェクト: arjclark/cylc
    def jobs_kill(self, job_log_root, job_log_dirs):
        """Kill multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        # Note: The more efficient way to do this is to group the jobs by their
        # batch systems, and call the kill command for each batch system once.
        # However, this will make it more difficult to determine if the kill
        # command for a particular job is successful or not.
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])
        now = get_current_time_string()
        for job_log_dir in job_log_dirs:
            ret_code, err = self.job_kill(
                os.path.join(job_log_root, job_log_dir, JOB_LOG_STATUS))
            sys.stdout.write("%s%s|%s|%d\n" % (
                self.OUT_PREFIX_SUMMARY, now, job_log_dir, ret_code))
            # Note: Print STDERR to STDOUT may look a bit strange, but it
            # requires less logic for the suite to parse the output.
            if err.strip():
                for line in err.splitlines(True):
                    if not line.endswith("\n"):
                        line += "\n"
                    sys.stdout.write("%s%s|%s|%s" % (
                        self.OUT_PREFIX_CMD_ERR, now, job_log_dir, line))
コード例 #15
0
 def put_broadcast(self, modified_settings, is_cancel=False):
     """Put or clear broadcasts in runtime database."""
     now = get_current_time_string(display_sub_seconds=True)
     for broadcast_change in (
             get_broadcast_change_iter(modified_settings, is_cancel)):
         broadcast_change["time"] = now
         self.db_inserts_map[self.TABLE_BROADCAST_EVENTS].append(
             broadcast_change)
         if is_cancel:
             self.db_deletes_map[self.TABLE_BROADCAST_STATES].append({
                 "point": broadcast_change["point"],
                 "namespace": broadcast_change["namespace"],
                 "key": broadcast_change["key"]})
             # Delete statements are currently executed before insert
             # statements, so we should clear out any insert statements that
             # are deleted here.
             # (Not the most efficient logic here, but unless we have a
             # large number of inserts, then this should not be a big
             # concern.)
             inserts = []
             for insert in self.db_inserts_map[self.TABLE_BROADCAST_STATES]:
                 if any(insert[key] != broadcast_change[key]
                         for key in ["point", "namespace", "key"]):
                     inserts.append(insert)
             self.db_inserts_map[self.TABLE_BROADCAST_STATES] = inserts
         else:
             self.db_inserts_map[self.TABLE_BROADCAST_STATES].append({
                 "point": broadcast_change["point"],
                 "namespace": broadcast_change["namespace"],
                 "key": broadcast_change["key"],
                 "value": broadcast_change["value"]})
コード例 #16
0
ファイル: broadcast.py プロジェクト: chanwilson/cylc
 def _update_db_queue(self):
     """Update the queue to the runtime DB."""
     this_dump = self._get_dump()
     if this_dump != self.prev_dump:
         now = get_current_time_string(display_sub_seconds=True)
         self.settings_queue.append(RecordBroadcastObject(now, this_dump))
         self.prev_dump = this_dump
コード例 #17
0
 def print_msg(self, msg):
     now = get_current_time_string(override_use_utc=self.utc)
     prefix = 'cylc (' + self.mode + ' - ' + now + '): '
     if self.priority == 'NORMAL':
         print prefix + msg
     else:
         print >> sys.stderr, prefix + self.priority + ' ' + msg
コード例 #18
0
ファイル: batch_sys_manager.py プロジェクト: arjclark/cylc
    def jobs_submit(self, job_log_root, job_log_dirs, remote_mode=False):
        """Submit multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])

        if remote_mode:
            items = self._jobs_submit_prep_by_stdin(job_log_root, job_log_dirs)
        else:
            items = self._jobs_submit_prep_by_args(job_log_root, job_log_dirs)
        now = get_current_time_string()
        for job_log_dir, batch_sys_name, submit_opts in items:
            job_file_path = os.path.join(
                job_log_root, job_log_dir, JOB_LOG_JOB)
            if not batch_sys_name:
                sys.stdout.write("%s%s|%s|1|\n" % (
                    self.OUT_PREFIX_SUMMARY, now, job_log_dir))
                continue
            ret_code, out, err, job_id = self._job_submit_impl(
                job_file_path, batch_sys_name, submit_opts)
            sys.stdout.write("%s%s|%s|%d|%s\n" % (
                self.OUT_PREFIX_SUMMARY, now, job_log_dir, ret_code, job_id))
            for key, value in [("STDERR", err), ("STDOUT", out)]:
                if value is None or not value.strip():
                    continue
                for line in value.splitlines(True):
                    if not value.endswith("\n"):
                        value += "\n"
                    sys.stdout.write("%s%s|%s|[%s] %s" % (
                        self.OUT_PREFIX_COMMAND, now, job_log_dir, key, line))
コード例 #19
0
 def _run_command_exit(cls, ctx, callback=None, callback_args=None):
     """Process command completion."""
     ctx.timestamp = get_current_time_string()
     if callable(callback):
         if not callback_args:
             callback_args = []
         callback(ctx, *callback_args)
コード例 #20
0
ファイル: task_state.py プロジェクト: m214089/cylc
 def _set_state(self, status):
     """Set, log and record task status (normal change, not forced - don't
     update task_events table)."""
     if self.status == self.hold_swap:
         self.hold_swap = None
     if status == self.status and self.hold_swap is None:
         return
     o_status, o_hold_swap = self.status, self.hold_swap
     if status == TASK_STATUS_HELD:
         self.hold_swap = self.status
     elif (self.hold_swap == TASK_STATUS_HELD and
             status not in TASK_STATUSES_FINAL):
         self.hold_swap = status
         status = TASK_STATUS_HELD
     elif self.hold_swap:
         self.hold_swap = None
     self.status = status
     self.time_updated = get_current_time_string()
     flags.iflag = True
     # Log
     message = str(o_status)
     if o_hold_swap:
         message += " (%s)" % o_hold_swap
     message += " => %s" % self.status
     if self.hold_swap:
         message += " (%s)" % self.hold_swap
     LOG.debug(message, itask=self.identity)
コード例 #21
0
ファイル: task_message.py プロジェクト: ScottWales/cylc
 def print_msg( self, msg ):
     now = get_current_time_string(override_use_utc=self.utc)
     prefix = 'cylc (' + self.mode + ' - ' + now + '): '
     if self.priority == 'NORMAL':
         print prefix + msg
     else:
         print >> sys.stderr, prefix + self.priority + ' ' + msg
コード例 #22
0
ファイル: task_state.py プロジェクト: dpmatthews/cylc
 def _set_state(self, status):
     """Set, log and record task status (normal change, not forced - don't
     update task_events table)."""
     if self.status == self.hold_swap:
         self.hold_swap = None
     if status == self.status and self.hold_swap is None:
         return
     prev_status, prev_hold_swap = self.status, self.hold_swap
     if status == TASK_STATUS_HELD:
         self.hold_swap = self.status
     elif status in TASK_STATUSES_ACTIVE:
         if self.status == TASK_STATUS_HELD:
             self.hold_swap = TASK_STATUS_HELD
     elif (self.hold_swap == TASK_STATUS_HELD and
             status not in TASK_STATUSES_FINAL):
         self.hold_swap = status
         status = TASK_STATUS_HELD
     elif self.hold_swap:
         self.hold_swap = None
     self.status = status
     self.time_updated = get_current_time_string()
     self.is_updated = True
     # Log
     message = str(prev_status)
     if prev_hold_swap:
         message += " (%s)" % prev_hold_swap
     message += " => %s" % self.status
     if self.hold_swap:
         message += " (%s)" % self.hold_swap
     LOG.debug("[%s] -%s", self.identity, message)
     return (prev_status, prev_hold_swap)
コード例 #23
0
ファイル: task_state.py プロジェクト: wrongkindofdoctor/cylc
 def _set_state(self, status):
     """Set, log and record task status (normal change, not forced - don't
     update task_events table)."""
     if self.status == self.hold_swap:
         self.hold_swap = None
     if status == self.status and self.hold_swap is None:
         return
     prev_status, prev_hold_swap = self.status, self.hold_swap
     if status == TASK_STATUS_HELD:
         self.hold_swap = self.status
     elif status in TASK_STATUSES_ACTIVE:
         if self.status == TASK_STATUS_HELD:
             self.hold_swap = TASK_STATUS_HELD
     elif (self.hold_swap == TASK_STATUS_HELD
           and status not in TASK_STATUSES_FINAL):
         self.hold_swap = status
         status = TASK_STATUS_HELD
     elif self.hold_swap:
         self.hold_swap = None
     self.status = status
     self.time_updated = get_current_time_string()
     self.is_updated = True
     # Log
     message = str(prev_status)
     if prev_hold_swap:
         message += " (%s)" % prev_hold_swap
     message += " => %s" % self.status
     if self.hold_swap:
         message += " (%s)" % self.hold_swap
     LOG.debug("[%s] -%s", self.identity, message)
     return (prev_status, prev_hold_swap)
コード例 #24
0
ファイル: batch_sys_manager.py プロジェクト: wenlien/cylc
    def jobs_kill(self, job_log_root, job_log_dirs):
        """Kill multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        # Note: The more efficient way to do this is to group the jobs by their
        # batch systems, and call the kill command for each batch system once.
        # However, this will make it more difficult to determine if the kill
        # command for a particular job is successful or not.
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])
        now = get_current_time_string()
        for job_log_dir in job_log_dirs:
            ret_code, err = self.job_kill(
                os.path.join(job_log_root, job_log_dir, "job.status"))
            sys.stdout.write("%s%s|%s|%d\n" % (
                self.OUT_PREFIX_SUMMARY, now, job_log_dir, ret_code))
            # Note: Print STDERR to STDOUT may look a bit strange, but it
            # requires less logic for the suite to parse the output.
            if err.strip():
                for line in err.splitlines(True):
                    if not line.endswith("\n"):
                        line += "\n"
                    sys.stdout.write("%s%s|%s|%s" % (
                        self.OUT_PREFIX_CMD_ERR, now, job_log_dir, line))
コード例 #25
0
    def take_checkpoints(self, event, other_daos=None):
        """Add insert items to *_checkpoints tables.

        Select items in suite_params, broadcast_states and task_pool and
        prepare them for insert into the relevant *_checkpoints tables, and
        prepare an insert into the checkpoint_id table the event and the
        current time.

        If other_daos is a specified, it should be a list of CylcSuiteDAO
        objects.  The logic will prepare insertion of the same items into the
        *_checkpoints tables of these DAOs as well.
        """
        id_ = 1
        for max_id, in self.connect().execute(
                "SELECT MAX(id) FROM %(table)s" %
                {"table": self.TABLE_CHECKPOINT_ID}):
            if max_id >= id_:
                id_ = max_id + 1
        daos = [self]
        if other_daos:
            daos.extend(other_daos)
        for dao in daos:
            dao.tables[self.TABLE_CHECKPOINT_ID].add_insert_item([
                id_, get_current_time_string(), event])
        for table_name in [
                self.TABLE_SUITE_PARAMS,
                self.TABLE_BROADCAST_STATES,
                self.TABLE_TASK_POOL]:
            for row in self.connect().execute("SELECT * FROM %s" % table_name):
                for dao in daos:
                    dao.tables[table_name + "_checkpoints"].add_insert_item(
                        [id_] + list(row))
コード例 #26
0
 def _append_db_queue(self, modified_settings, is_cancel=False):
     """Update the queue to the runtime DB."""
     now = get_current_time_string(display_sub_seconds=True)
     for broadcast_change in (
             get_broadcast_change_iter(modified_settings, is_cancel)):
         broadcast_change["time"] = now
         self.db_inserts_map[self.TABLE_BROADCAST_EVENTS].append(
             broadcast_change)
         if is_cancel:
             self.db_deletes_map[self.TABLE_BROADCAST_STATES].append({
                 "point": broadcast_change["point"],
                 "namespace": broadcast_change["namespace"],
                 "key": broadcast_change["key"]})
             # Delete statements are currently executed before insert
             # statements, so we should clear out any insert statements that
             # are deleted here.
             # (Not the most efficient logic here, but unless we have a
             # large number of inserts, then this should not be a big
             # concern.)
             inserts = []
             for insert in self.db_inserts_map[self.TABLE_BROADCAST_STATES]:
                 if any([insert[key] != broadcast_change[key] for key in
                         ["point", "namespace", "key"]]):
                     inserts.append(insert)
             self.db_inserts_map[self.TABLE_BROADCAST_STATES] = inserts
         else:
             self.db_inserts_map[self.TABLE_BROADCAST_STATES].append({
                 "point": broadcast_change["point"],
                 "namespace": broadcast_change["namespace"],
                 "key": broadcast_change["key"],
                 "value": broadcast_change["value"]})
コード例 #27
0
ファイル: batch_sys_manager.py プロジェクト: wenlien/cylc
    def jobs_submit(self, job_log_root, job_log_dirs, remote_mode=False):
        """Submit multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])

        if remote_mode:
            items = self._jobs_submit_prep_by_stdin(job_log_root, job_log_dirs)
        else:
            items = self._jobs_submit_prep_by_args(job_log_root, job_log_dirs)
        now = get_current_time_string()
        for job_log_dir, batch_sys_name, submit_opts in items:
            job_file_path = os.path.join(
                job_log_root, job_log_dir, self.JOB_FILE_BASE)
            if not batch_sys_name:
                sys.stdout.write("%s%s|%s|1|\n" % (
                    self.OUT_PREFIX_SUMMARY, now, job_log_dir))
                continue
            ret_code, out, err, job_id = self._job_submit_impl(
                job_file_path, batch_sys_name, submit_opts)
            sys.stdout.write("%s%s|%s|%d|%s\n" % (
                self.OUT_PREFIX_SUMMARY, now, job_log_dir, ret_code, job_id))
            for key, value in [("STDERR", err), ("STDOUT", out)]:
                if value is None or not value.strip():
                    continue
                for line in value.splitlines(True):
                    if not value.endswith("\n"):
                        value += "\n"
                    sys.stdout.write("%s%s|%s|[%s] %s" % (
                        self.OUT_PREFIX_COMMAND, now, job_log_dir, key, line))
コード例 #28
0
ファイル: task_events_mgr.py プロジェクト: ahuang11/cylc
 def _db_events_insert(self, itask, event="", message=""):
     """Record an event to the DB."""
     self.suite_db_mgr.put_insert_task_events(
         itask, {
             "time": get_current_time_string(),
             "event": event,
             "message": message
         })
コード例 #29
0
ファイル: suite_logging.py プロジェクト: m214089/cylc
    def __init__(self, suite, test_params=None):
        if SuiteLog.__INSTANCE:
            raise Exception("Attempting to initiate a second singleton"
                            "instance.")

        self._group = None
        if not test_params:
            self.is_test = False
            self.max_bytes = GLOBAL_CFG.get(
                ['suite logging', 'maximum size in bytes'])
            self.roll_at_startup = GLOBAL_CFG.get(
                ['suite logging', 'roll over at start-up'])
            self.archive_length = GLOBAL_CFG.get(
                ['suite logging', 'rolling archive length'])
        else:
            self.is_test = True
            self.max_bytes = test_params['max_bytes']
            self.roll_at_startup = test_params['roll_at_startup']
            self.archive_length = 4

        # Log paths.
        if test_params:
            self.ldir = test_params['ldir']
        else:
            self.ldir = GLOBAL_CFG.get_derived_host_item(
                suite, 'suite log directory')
        self.log_paths = {}
        self.log_paths[self.LOG] = os.path.join(self.ldir, self.LOG)
        self.log_paths[self.OUT] = os.path.join(self.ldir, self.OUT)
        self.log_paths[self.ERR] = os.path.join(self.ldir, self.ERR)

        # The loggers.
        self.loggers = {}
        self.loggers[self.LOG] = None
        self.loggers[self.OUT] = None
        self.loggers[self.ERR] = None

        # Filename stamp functions.
        if self.is_test:
            self.stamp = lambda: get_current_time_string(True, True, True
                                                         ).replace('.', '-')
        else:
            self.stamp = lambda: get_current_time_string(False, True, True)

        SuiteLog.__INSTANCE = self
コード例 #30
0
ファイル: suite_logging.py プロジェクト: rbaumert/cylc
    def __init__(self, suite, test_params=None):
        if SuiteLog.__INSTANCE:
            raise Exception("Attempting to initiate a second singleton"
                            "instance.")

        self._group = None
        if not test_params:
            self.is_test = False
            self.max_bytes = GLOBAL_CFG.get(
                ['suite logging', 'maximum size in bytes'])
            self.roll_at_startup = GLOBAL_CFG.get(
                ['suite logging', 'roll over at start-up'])
            self.archive_length = GLOBAL_CFG.get(
                ['suite logging', 'rolling archive length'])
        else:
            self.is_test = True
            self.max_bytes = test_params['max_bytes']
            self.roll_at_startup = test_params['roll_at_startup']
            self.archive_length = 4

        # Log paths.
        if test_params:
            self.ldir = test_params['ldir']
        else:
            self.ldir = GLOBAL_CFG.get_derived_host_item(
                suite, 'suite log directory')
        self.log_paths = {}
        self.log_paths[self.LOG] = os.path.join(self.ldir, self.LOG)
        self.log_paths[self.OUT] = os.path.join(self.ldir, self.OUT)
        self.log_paths[self.ERR] = os.path.join(self.ldir, self.ERR)

        # The loggers.
        self.loggers = {}
        self.loggers[self.LOG] = None
        self.loggers[self.OUT] = None
        self.loggers[self.ERR] = None

        # Filename stamp functions.
        if self.is_test:
            self.stamp = lambda: get_current_time_string(True, True, True
                                                         ).replace('.', '-')
        else:
            self.stamp = lambda: get_current_time_string(False, True, True)

        SuiteLog.__INSTANCE = self
コード例 #31
0
    def __init__(self, cmd_key, cmd, **cmd_kwargs):
        self.timestamp = get_current_time_string()
        self.cmd_key = cmd_key
        self.cmd = cmd
        self.cmd_kwargs = cmd_kwargs

        self.err = cmd_kwargs.get('err')
        self.ret_code = cmd_kwargs.get('ret_code')
        self.out = cmd_kwargs.get('out')
コード例 #32
0
ファイル: suite_db_mgr.py プロジェクト: arjclark/cylc
    def put_task_pool(self, pool):
        """Put statements to update the task_pool table in runtime database.

        Update the task_pool table and the task_action_timers table.
        Queue delete (everything) statements to wipe the tables, and queue the
        relevant insert statements for the current tasks in the pool.
        """
        self.db_deletes_map[self.TABLE_TASK_POOL].append({})
        # No need to do:
        # self.db_deletes_map[self.TABLE_TASK_ACTION_TIMERS].append({})
        # Should already be done by self.put_task_event_timers above.
        self.db_deletes_map[self.TABLE_TASK_TIMEOUT_TIMERS].append({})
        for itask in pool.get_all_tasks():
            self.db_inserts_map[self.TABLE_TASK_POOL].append({
                "name": itask.tdef.name,
                "cycle": str(itask.point),
                "spawned": int(itask.has_spawned),
                "status": itask.state.status,
                "hold_swap": itask.state.hold_swap})
            if itask.state.status in itask.timeout_timers:
                self.db_inserts_map[self.TABLE_TASK_TIMEOUT_TIMERS].append({
                    "name": itask.tdef.name,
                    "cycle": str(itask.point),
                    "timeout": itask.timeout_timers[itask.state.status]})
            for ctx_key_0 in ["poll_timers", "try_timers"]:
                for ctx_key_1, timer in getattr(itask, ctx_key_0).items():
                    if timer is None:
                        continue
                    self.db_inserts_map[self.TABLE_TASK_ACTION_TIMERS].append({
                        "name": itask.tdef.name,
                        "cycle": str(itask.point),
                        "ctx_key": json.dumps((ctx_key_0, ctx_key_1)),
                        "ctx": self._namedtuple2json(timer.ctx),
                        "delays": json.dumps(timer.delays),
                        "num": timer.num,
                        "delay": timer.delay,
                        "timeout": timer.timeout})
            if itask.state.time_updated:
                set_args = {
                    "time_updated": itask.state.time_updated,
                    "submit_num": itask.submit_num,
                    "try_num": itask.get_try_num(),
                    "status": itask.state.status}
                where_args = {
                    "cycle": str(itask.point),
                    "name": itask.tdef.name,
                }
                self.db_updates_map.setdefault(self.TABLE_TASK_STATES, [])
                self.db_updates_map[self.TABLE_TASK_STATES].append(
                    (set_args, where_args))
                itask.state.time_updated = None

        self.db_inserts_map[self.TABLE_CHECKPOINT_ID].append({
            # id = -1 for latest
            "id": CylcSuiteDAO.CHECKPOINT_LATEST_ID,
            "time": get_current_time_string(),
            "event": CylcSuiteDAO.CHECKPOINT_LATEST_EVENT})
コード例 #33
0
ファイル: mp_pool.py プロジェクト: arjclark/cylc
    def __init__(self, cmd_key, cmd, **cmd_kwargs):
        self.timestamp = get_current_time_string()
        self.cmd_key = cmd_key
        self.cmd = cmd
        self.cmd_kwargs = cmd_kwargs

        self.err = cmd_kwargs.get('err')
        self.ret_code = cmd_kwargs.get('ret_code')
        self.out = cmd_kwargs.get('out')
コード例 #34
0
ファイル: task_message.py プロジェクト: ScottWales/cylc
    def __init__( self, msg=None, priority='NORMAL' ):

        self.msg = msg

        if priority in [ 'NORMAL', 'WARNING', 'CRITICAL' ]:
            self.priority = priority
        else:
            raise Exception( 'Illegal message priority ' + priority )

        # load the environment
        self.env_map = dict(os.environ)

        # set some instance variables
        for attr, key, default in (
                ('suite', 'CYLC_SUITE_NAME', '(CYLC_SUITE_NAME)'),
                ('task_id', 'CYLC_TASK_ID', '(CYLC_TASK_ID)'),
                ('retry_seconds', 'CYLC_TASK_MSG_RETRY_INTVL', '(CYLC_TASK_MSG_RETRY_INTVL)'),
                ('max_tries',     'CYLC_TASK_MSG_MAX_TRIES',   '(CYLC_TASK_MSG_MAX_TRIES)'),
                ('try_timeout',   'CYLC_TASK_MSG_TIMEOUT',     '(CYLC_TASK_MSG_TIMEOUT)'),
                ('owner', 'CYLC_SUITE_OWNER', None),
                ('host', 'CYLC_SUITE_HOST', '(CYLC_SUITE_HOST)'),
                ('port', 'CYLC_SUITE_PORT', '(CYLC_SUITE_PORT)')):
            value = self.env_map.get(key, default)
            setattr(self, attr, value)

        # conversions from string:
        if self.try_timeout == 'None':
            self.try_timeout = None
        try:
            self.retry_seconds = float( self.retry_seconds )
            self.max_tries = int( self.max_tries )
        except:
            pass

        cylc.flags.verbose = cylc.flags.verbose or self.env_map.get('CYLC_VERBOSE') == 'True'

        # 'scheduler' or 'submit', (or 'raw' if job script run manually)
        self.mode = self.env_map.get( 'CYLC_MODE', 'raw' )

        rd = self.env_map.get( 'CYLC_SUITE_RUN_DIR', '.' )
        self.env_file_path = os.path.join (rd, 'cylc-suite-env' )

        self.utc = self.env_map.get('CYLC_UTC') == 'True'
        # Record the time the messaging system was called and append it
        # to the message, in case the message is delayed in some way.
        self.true_event_time = get_current_time_string(
            override_use_utc=self.utc)

        self.ssh_messaging = (
                self.env_map.get('CYLC_TASK_COMMS_METHOD') == 'ssh' )

        self.polling = (
                self.env_map.get('CYLC_TASK_COMMS_METHOD') == 'poll' )

        self.ssh_login_shell = (
                self.env_map.get('CYLC_TASK_SSH_LOGIN_SHELL') != 'False')
コード例 #35
0
ファイル: job_logs.py プロジェクト: dmanubens-zz/cylc
 def append_to_log(self, submit_num, log_type, out=None, err=None):
     """Write new command output to the appropriate log file."""
     sub_num = "%02d" % int(submit_num)
     dir_ = os.path.join(self.base_path, sub_num)
     mkdir_p(dir_)
     job_log_handle = open(os.path.join(dir_, "job-activity.log"), "a")
     timestamp = get_current_time_string()
     self._write_to_log(job_log_handle, timestamp, log_type + "-OUT", out)
     self._write_to_log(job_log_handle, timestamp, log_type + "-ERR", err)
     job_log_handle.close()
コード例 #36
0
ファイル: job_logs.py プロジェクト: dmanubens/cylc
 def append_to_log(self, submit_num, log_type, out=None, err=None):
     """Write new command output to the appropriate log file."""
     sub_num = "%02d" % int(submit_num)
     dir_ = os.path.join(self.base_path, sub_num)
     mkdir_p(dir_)
     job_log_handle = open(os.path.join(dir_, "job-activity.log"), "a")
     timestamp = get_current_time_string()
     self._write_to_log(job_log_handle, timestamp, log_type + "-OUT", out)
     self._write_to_log(job_log_handle, timestamp, log_type + "-ERR", err)
     job_log_handle.close()
コード例 #37
0
ファイル: batch_sys_manager.py プロジェクト: wenlien/cylc
    def jobs_poll(self, job_log_root, job_log_dirs):
        """Poll multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])

        ctx_list = []  # Contexts for all relevant jobs
        ctx_list_by_batch_sys = {}  # {batch_sys_name1: [ctx1, ...], ...}

        for job_log_dir in job_log_dirs:
            ctx = self._jobs_poll_status_files(job_log_root, job_log_dir)
            if ctx is None:
                continue
            ctx_list.append(ctx)

            if not ctx.batch_sys_name or not ctx.batch_sys_job_id:
                # Lost batch system information for some reason.
                # Mark the job as if it is no longer in the batch system.
                ctx.batch_sys_exit_polled = 1
                sys.stderr.write(
                    "%s/job.status: incomplete batch system info\n" % (
                        ctx.job_log_dir))

            # We can trust:
            # * Jobs previously polled to have exited the batch system.
            # * Jobs succeeded or failed with ERR/EXIT.
            if (ctx.batch_sys_exit_polled or ctx.run_status == 0 or
                    ctx.run_signal in ["ERR", "EXIT"]):
                continue

            if ctx.batch_sys_name not in ctx_list_by_batch_sys:
                ctx_list_by_batch_sys[ctx.batch_sys_name] = []
            ctx_list_by_batch_sys[ctx.batch_sys_name].append(ctx)

        for batch_sys_name, my_ctx_list in ctx_list_by_batch_sys.items():
            self._jobs_poll_batch_sys(
                job_log_root, batch_sys_name, my_ctx_list)

        cur_time_str = get_current_time_string()
        for ctx in ctx_list:
            for message in ctx.messages:
                sys.stdout.write("%s%s|%s|%s\n" % (
                    self.OUT_PREFIX_MESSAGE,
                    cur_time_str,
                    ctx.job_log_dir,
                    message))
            sys.stdout.write("%s%s|%s\n" % (
                self.OUT_PREFIX_SUMMARY,
                cur_time_str,
                ctx.get_summary_str()))
コード例 #38
0
ファイル: batch_sys_manager.py プロジェクト: arjclark/cylc
    def jobs_poll(self, job_log_root, job_log_dirs):
        """Poll multiple jobs.

        job_log_root -- The log/job/ sub-directory of the suite.
        job_log_dirs -- A list containing point/name/submit_num for task jobs.

        """
        if "$" in job_log_root:
            job_log_root = os.path.expandvars(job_log_root)
        self.configure_suite_run_dir(job_log_root.rsplit(os.sep, 2)[0])

        ctx_list = []  # Contexts for all relevant jobs
        ctx_list_by_batch_sys = {}  # {batch_sys_name1: [ctx1, ...], ...}

        for job_log_dir in job_log_dirs:
            ctx = self._jobs_poll_status_files(job_log_root, job_log_dir)
            if ctx is None:
                continue
            ctx_list.append(ctx)

            if not ctx.batch_sys_name or not ctx.batch_sys_job_id:
                # Lost batch system information for some reason.
                # Mark the job as if it is no longer in the batch system.
                ctx.batch_sys_exit_polled = 1
                sys.stderr.write(
                    "%s/%s: incomplete batch system info\n" % (
                        ctx.job_log_dir, JOB_LOG_STATUS))

            # We can trust:
            # * Jobs previously polled to have exited the batch system.
            # * Jobs succeeded or failed with ERR/EXIT.
            if (ctx.batch_sys_exit_polled or ctx.run_status == 0 or
                    ctx.run_signal in ["ERR", "EXIT"]):
                continue

            if ctx.batch_sys_name not in ctx_list_by_batch_sys:
                ctx_list_by_batch_sys[ctx.batch_sys_name] = []
            ctx_list_by_batch_sys[ctx.batch_sys_name].append(ctx)

        for batch_sys_name, my_ctx_list in ctx_list_by_batch_sys.items():
            self._jobs_poll_batch_sys(
                job_log_root, batch_sys_name, my_ctx_list)

        cur_time_str = get_current_time_string()
        for ctx in ctx_list:
            for message in ctx.messages:
                sys.stdout.write("%s%s|%s|%s\n" % (
                    self.OUT_PREFIX_MESSAGE,
                    cur_time_str,
                    ctx.job_log_dir,
                    message))
            sys.stdout.write("%s%s|%s\n" % (
                self.OUT_PREFIX_SUMMARY,
                cur_time_str,
                ctx.get_summary_str()))
コード例 #39
0
ファイル: rundb.py プロジェクト: ScottWales/cylc
 def __init__(self, name, cycle, time_created_string=None,
              time_updated_string=None, submit_num=None,
              is_manual_submit=None, try_num=None, host=None,
              submit_method=None, submit_method_id=None, status=None):
     """Insert a new row into the states table"""
     if time_created_string is None:
         time_created_string = get_current_time_string()
     self.s_fmt = "INSERT INTO task_states VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
     self.args = [name, cycle, time_created_string, time_updated_string,
                  submit_num, is_manual_submit, try_num, host,
                  submit_method, submit_method_id, status]
コード例 #40
0
    def __init__(self, priority=NORMAL):
        if priority in self.PRIORITIES:
            self.priority = priority
        else:
            raise Exception('Illegal message priority ' + priority)

        # load the environment
        self.env_map = dict(os.environ)

        # set some instance variables
        self.suite = None
        self.task_id = None
        self.retry_seconds = None
        self.max_tries = None
        self.try_timeout = None
        self.owner = None
        self.host = None
        self.port = None
        for attr, key, default in self.ATTRS:
            value = self.env_map.get(key, default)
            setattr(self, attr, value)

        # conversions from string:
        if self.try_timeout == 'None':
            self.try_timeout = None
        try:
            self.retry_seconds = float(self.retry_seconds)
            self.max_tries = int(self.max_tries)
        except ValueError:
            pass

        cylc.flags.verbose = (
            cylc.flags.verbose or self.env_map.get('CYLC_VERBOSE') == 'True')

        # 'scheduler' or 'submit', (or 'raw' if job script run manually)
        self.mode = self.env_map.get('CYLC_MODE', 'raw')

        suite_run_dir = self.env_map.get('CYLC_SUITE_RUN_DIR', '.')
        self.env_file_path = os.path.join(suite_run_dir, 'cylc-suite-env')

        self.utc = self.env_map.get('CYLC_UTC') == 'True'
        # Record the time the messaging system was called and append it
        # to the message, in case the message is delayed in some way.
        self.true_event_time = get_current_time_string(
            override_use_utc=self.utc)

        self.ssh_messaging = (
            self.env_map.get('CYLC_TASK_COMMS_METHOD') == 'ssh')

        self.polling = (
            self.env_map.get('CYLC_TASK_COMMS_METHOD') == 'poll')

        self.ssh_login_shell = (
            self.env_map.get('CYLC_TASK_SSH_LOGIN_SHELL') != 'False')
コード例 #41
0
ファイル: suite_logging.py プロジェクト: rbaumert/cylc
 def log(self, level, *args, **kwargs):
     if self.logger.handlers:
         # If this logger has file handlers write out to it.
         self.logger.log(level, *args, **kwargs)
     else:
         # No file handlers, write out to stdout/stderr.
         msg = (get_current_time_string() + ' ' + logging._levelNames[level]
                + ' - ' + str(args[0]), ) + tuple(*args[1:])
         if self.log_ in [SuiteLog.OUT, SuiteLog.LOG]:
             print(*msg)
         else:
             print(*msg, file=sys.stderr)
コード例 #42
0
ファイル: rundb.py プロジェクト: dmanubens-zz/cylc
 def __init__(self, name, cycle, time_created_string=None,
              time_updated_string=None, submit_num=None,
              is_manual_submit=None, try_num=None, host=None,
              submit_method=None, submit_method_id=None, status=None):
     """Insert a new row into the states table"""
     if time_created_string is None:
         time_created_string = get_current_time_string()
     self.s_fmt = "INSERT INTO task_states VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
     self.args = [name, cycle, time_created_string, time_updated_string,
                  submit_num, is_manual_submit, try_num, host,
                  submit_method, submit_method_id, status]
     self.to_run = True
コード例 #43
0
ファイル: updater.py プロジェクト: m214089/cylc
 def update(self):
     """Try and connect and do an update."""
     if self._no_update_event.is_set():
         return False
     if not self.connect_schd.ready():
         self.info_bar.set_update_time(
             None,
             get_seconds_as_interval_string(
                 round(self.connect_schd.dt_next)))
         return False
     if cylc.flags.debug:
         print >> sys.stderr, "UPDATE %s" % get_current_time_string()
     if not self.connected:
         # Only reconnect via self.reconnect().
         self.reconnect()
     if not self.connected:
         self.set_stopped()
         if cylc.flags.debug:
             print >> sys.stderr, "(not connected)"
         return False
     if cylc.flags.debug:
         print >> sys.stderr, "(connected)"
     try:
         err_log_changed = self.retrieve_err_log()
         summaries_changed = self.retrieve_summary_update_time()
         if self.summary_update_time is not None and summaries_changed:
             self.retrieve_state_summaries()
     except Exception as exc:
         if self.status == SUITE_STATUS_STOPPING:
             # Expected stop: prevent the reconnection warning dialog.
             self.connect_fail_warned = True
         if cylc.flags.debug:
             print >> sys.stderr, "  CONNECTION LOST", str(exc)
         self.set_stopped()
         if self.info_bar.prog_bar_active():
             gobject.idle_add(self.info_bar.prog_bar_stop)
         self.reconnect()
         return False
     else:
         # Got suite data.
         self.version_mismatch_warned = False
         status_str = None
         if self.status in [SUITE_STATUS_INITIALISING,
                            SUITE_STATUS_STOPPING]:
             status_str = self.status
         elif self.is_reloading:
             status_str = "reloading"
         if status_str is None:
             gobject.idle_add(self.info_bar.prog_bar_stop)
         elif self.info_bar.prog_bar_can_start():
             gobject.idle_add(self.info_bar.prog_bar_start, status_str)
         return summaries_changed or err_log_changed
コード例 #44
0
ファイル: suite_logging.py プロジェクト: oliver-sanders/cylc
 def log(self, level, *args, **kwargs):
     if self.logger.handlers:
         # If this logger has file handlers write out to it.
         self.logger.log(level, *args, **kwargs)
     else:
         # No file handlers, write out to stdout/stderr.
         msg = (get_current_time_string() + ' ' +
                logging._levelNames[level] + ' - ' +
                str(args[0]),) + tuple(*args[1:])
         if self.log_ in [SuiteLog.OUT, SuiteLog.LOG]:
             print(*msg)
         else:
             print(*msg, file=sys.stderr)
コード例 #45
0
ファイル: batch_sys_manager.py プロジェクト: aosprey/cylc
 def _jobs_poll_batch_sys(self, job_log_root, batch_sys_name, my_ctx_list):
     """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs)."""
     batch_sys = self.get_inst(batch_sys_name)
     all_job_ids = [ctx.batch_sys_job_id for ctx in my_ctx_list]
     if hasattr(batch_sys, "get_poll_many_cmd"):
         # Some poll commands may not be as simple
         cmd = batch_sys.get_poll_many_cmd(all_job_ids)
     else:  # if hasattr(batch_sys, "POLL_CMD"):
         # Simple poll command that takes a list of job IDs
         cmd = [batch_sys.POLL_CMD] + all_job_ids
     try:
         proc = Popen(cmd, stderr=PIPE, stdout=PIPE)
     except OSError as exc:
         # subprocess.Popen has a bad habit of not setting the
         # filename of the executable when it raises an OSError.
         if not exc.filename:
             exc.filename = cmd[0]
         sys.stderr.write(str(exc) + "\n")
         return
     proc.wait()
     out, err = proc.communicate()
     sys.stderr.write(err)
     if hasattr(batch_sys, "filter_poll_many_output"):
         # Allow custom filter
         job_ids = batch_sys.filter_poll_many_output(out)
     else:
         # Just about all poll commands return a table, with column 1
         # being the job ID. The logic here should be sufficient to
         # ensure that any table header is ignored.
         job_ids = []
         for line in out.splitlines():
             try:
                 head = line.split(None, 1)[0]
             except IndexError:
                 continue
             if head in all_job_ids:
                 job_ids.append(head)
     for ctx in my_ctx_list:
         ctx.batch_sys_exit_polled = int(
             ctx.batch_sys_job_id not in job_ids)
         # Add information to "job.status"
         if ctx.batch_sys_exit_polled:
             try:
                 handle = open(os.path.join(
                     job_log_root, ctx.job_log_dir, "job.status"), "a")
                 handle.write("%s=%s\n" % (
                     self.CYLC_BATCH_SYS_EXIT_POLLED,
                     get_current_time_string()))
                 handle.close()
             except IOError as exc:
                 sys.stderr.write(str(exc) + "\n")
コード例 #46
0
ファイル: task_message.py プロジェクト: m214089/cylc
    def __init__(self, priority=NORMAL):
        if priority in self.PRIORITIES:
            self.priority = priority
        else:
            raise ValueError('Illegal message priority ' + priority)

        # load the environment
        self.env_map = dict(os.environ)

        # set some instance variables
        self.suite = self.env_map.get(SuiteSrvFilesManager.KEY_NAME)
        self.task_id = self.env_map.get('CYLC_TASK_ID')

        # Record the time the messaging system was called and append it
        # to the message, in case the message is delayed in some way.
        self.true_event_time = get_current_time_string(
            override_use_utc=self.env_map.get('CYLC_UTC') == 'True')
コード例 #47
0
    def __init__(self, severity=NORMAL):
        if severity in self.SEVERITIES:
            self.severity = severity
        else:
            raise ValueError('Illegal message severity ' + severity)

        # load the environment
        self.env_map = dict(os.environ)

        # set some instance variables
        self.suite = self.env_map.get(SuiteSrvFilesManager.KEY_NAME)
        self.task_id = self.env_map.get('CYLC_TASK_ID')

        # Record the time the messaging system was called and append it
        # to the message, in case the message is delayed in some way.
        self.true_event_time = get_current_time_string(
            override_use_utc=self.env_map.get('CYLC_UTC') == 'True')
コード例 #48
0
ファイル: rundb.py プロジェクト: ScottWales/cylc
 def __init__(self, table, name, cycle, **kwargs):
     """Update a row in a table."""
     kwargs["time_updated"] = get_current_time_string()
     s_fmt = "UPDATE %(table)s SET %(cols)s WHERE name==? AND cycle==?"
     cols = ""
     args = []
     not_first = False
     for k, v in kwargs.items():
         if not_first:
             cols += ", "
         not_first = True
         cols += k + "=?"
         args.append(v)
     args.append(name)
     args.append(cycle)
     self.s_fmt = s_fmt % {"table": table, "cols": cols}
     self.args = args
コード例 #49
0
ファイル: rundb.py プロジェクト: dmanubens-zz/cylc
 def __init__(self, table, name, cycle, **kwargs):
     """Update a row in a table."""
     kwargs["time_updated"] = get_current_time_string()
     s_fmt = "UPDATE %(table)s SET %(cols)s WHERE name==? AND cycle==?"
     cols = ""
     args = []
     not_first = False
     for k, v in kwargs.items():
         if not_first:
             cols += ", "
         not_first = True
         cols += k + "=?"
         args.append(v)
     args.append(name)
     args.append(cycle)
     self.s_fmt = s_fmt % {"table": table, "cols": cols}
     self.args = args
     self.to_run = True
コード例 #50
0
ファイル: task_job_mgr.py プロジェクト: dpmatthews/cylc
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(
         SubProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1),
         suite, itask.point, itask.tdef.name, submit_num=itask.submit_num)
     if not dry_run:
         # Persist
         self.suite_db_mgr.put_insert_task_jobs(itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'batch_sys_name': itask.summary.get('batch_sys_name'),
         })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
コード例 #51
0
ファイル: updater.py プロジェクト: ivorblockley/cylc
    def set_stopped(self):
        """Reset data and clients when suite is stopped."""
        if cylc.flags.debug:
            sys.stderr.write("%s NOT CONNECTED\n" % get_current_time_string())
        self.full_mode = True
        self.connected = False
        self.set_status(SUITE_STATUS_STOPPED)
        self.update_interval += 1.0
        if self.update_interval > self.max_update_interval:
            self.update_interval = self.max_update_interval
        self.state_summary = {}
        self.full_state_summary = {}
        self.fam_state_summary = {}
        self.full_fam_state_summary = {}
        self.all_families = {}
        self.global_summary = {}
        self.cfg.port = None
        self.client = None

        gobject.idle_add(self.app_window.set_title, str(self.cfg.suite))

        # Use info bar to display stop summary if available.
        # Otherwise, just display the reconnect count down.
        if self.cfg.suite and self.stop_summary is None:
            stop_summary = get_stop_state_summary(
                cat_state(self.cfg.suite, self.cfg.host, self.cfg.owner))
            if stop_summary != self.stop_summary:
                self.stop_summary = stop_summary
                self.status = SUITE_STATUS_STOPPED
                gobject.idle_add(
                    self.info_bar.set_stop_summary, stop_summary)
                self.last_update_time = time()
        try:
            update_time_str = time2str(self.stop_summary[0]["last_updated"])
        except (AttributeError, IndexError, KeyError, TypeError):
            update_time_str = None
        gobject.idle_add(
            self.info_bar.set_update_time,
            update_time_str, self.info_bar.DISCONNECTED_TEXT)
        gobject.idle_add(self.info_bar.prog_bar_stop)
コード例 #52
0
def record_messages(suite, task_job, messages):
    """Record task job messages.

    Print the messages according to their severity.
    Write the messages in the job status file.
    Send the messages to the suite, if possible.

    Arguments:
        suite (str): Suite name.
        task_job (str): Task job identifier "CYCLE/TASK_NAME/SUBMIT_NUM".
        messages (list): List of messages "[[severity, message], ...]".
    """
    # Record the event time, in case the message is delayed in some way.
    event_time = get_current_time_string(
        override_use_utc=(os.getenv('CYLC_UTC') == 'True'))
    # Print to stdout/stderr
    for severity, message in messages:
        if severity in STDERR_LEVELS:
            handle = sys.stderr
        else:
            handle = sys.stdout
        handle.write('%s %s - %s\n' % (event_time, severity, message))
    handle.flush()
    # Write to job.status
    _append_job_status_file(suite, task_job, event_time, messages)
    # Send messages
    try:
        pclient = SuiteRuntimeClient(suite)
    except Exception:
        # Backward communication not possible
        if cylc.flags.debug:
            import traceback
            traceback.print_exc()
    pclient('put_messages', {
        'task_job': task_job,
        'event_time': event_time,
        'messages': messages
    })
コード例 #53
0
 def do_rollover(self):
     """Create and rollover log file if necessary."""
     # Generate new file name
     self.stamp = get_current_time_string(use_basic_format=True)
     filename = self.baseFilename + '.' + self.stamp
     mkdir_p(os.path.dirname(filename))
     # Touch file
     with open(filename, 'w+'):
         os.utime(filename, None)
     # Update symlink
     if (os.path.exists(self.baseFilename)
             or os.path.lexists(self.baseFilename)):
         os.unlink(self.baseFilename)
     os.symlink(os.path.basename(filename), self.baseFilename)
     # Housekeep log files
     arch_len = glbl_cfg().get([self.GLBL_KEY, 'rolling archive length'])
     if arch_len:
         log_files = glob(self.baseFilename + '.*')
         log_files.sort()
         while len(log_files) > arch_len:
             os.unlink(log_files.pop(0))
     # Reopen stream, redirect STDOUT and STDERR to log
     if self.stream:
         self.stream.close()
         self.stream = None
     self.stream = self._open()
     # Dup STDOUT and STDERR in detach mode
     if not self.no_detach:
         os.dup2(self.stream.fileno(), sys.stdout.fileno())
         os.dup2(self.stream.fileno(), sys.stderr.fileno())
     # Emit header records (should only do this for subsequent log files)
     for header_record in self.header_records:
         if self.FILE_NUM in header_record.__dict__:
             # Increment log file number
             header_record.__dict__[self.FILE_NUM] += 1
             header_record.args = header_record.args[0:-1] + (
                 header_record.__dict__[self.FILE_NUM], )
         logging.FileHandler.emit(self, header_record)
コード例 #54
0
ファイル: loggingutil.py プロジェクト: dpmatthews/cylc
 def do_rollover(self):
     """Create and rollover log file if necessary."""
     # Generate new file name
     self.stamp = get_current_time_string(use_basic_format=True)
     filename = self.baseFilename + '.' + self.stamp
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     # Touch file
     with open(filename, 'w+'):
         os.utime(filename, None)
     # Update symlink
     if (os.path.exists(self.baseFilename) or
             os.path.lexists(self.baseFilename)):
         os.unlink(self.baseFilename)
     os.symlink(os.path.basename(filename), self.baseFilename)
     # Housekeep log files
     arch_len = glbl_cfg().get([self.GLBL_KEY, 'rolling archive length'])
     if arch_len:
         log_files = glob(self.baseFilename + '.*')
         log_files.sort()
         while len(log_files) > arch_len:
             os.unlink(log_files.pop(0))
     # Reopen stream, redirect STDOUT and STDERR to log
     if self.stream:
         self.stream.close()
         self.stream = None
     self.stream = self._open()
     # Dup STDOUT and STDERR in detach mode
     if not self.no_detach:
         os.dup2(self.stream.fileno(), sys.stdout.fileno())
         os.dup2(self.stream.fileno(), sys.stderr.fileno())
     # Emit header records (should only do this for subsequent log files)
     for header_record in self.header_records:
         if self.FILE_NUM in header_record.__dict__:
             # Increment log file number
             header_record.__dict__[self.FILE_NUM] += 1
             header_record.args = header_record.args[0:-1] + (
                 header_record.__dict__[self.FILE_NUM],)
         logging.FileHandler.emit(self, header_record)
コード例 #55
0
ファイル: suite_logging.py プロジェクト: m214089/cylc
 def log(self, level, *args, **kwargs):
     try:
         itask = kwargs.pop("itask")
     except KeyError:
         pass
     else:
         try:
             args = ("[%s] -%s" % (itask.identity, args[0]),) + args[1:]
         except AttributeError:
             args = ("[%s] -%s" % (itask, args[0]),) + args[1:]
         args = tuple(args)
     if self.logger.handlers:
         # If this logger has file handlers write out to it.
         self.logger.log(level, *args, **kwargs)
     else:
         # No file handlers, write out to stdout/stderr.
         msg = (get_current_time_string() + ' ' +
                logging._levelNames[level] + ' - ' +
                str(args[0]),) + tuple(*args[1:])
         if self.log_ in [SuiteLog.OUT, SuiteLog.LOG]:
             print(*msg)
         else:
             print(*msg, file=sys.stderr)
コード例 #56
0
ファイル: suite_logging.py プロジェクト: wenlien/cylc
 def log(self, level, *args, **kwargs):
     try:
         itask = kwargs.pop("itask")
     except KeyError:
         pass
     else:
         try:
             args = ("[%s] -%s" % (itask.identity, args[0]), ) + args[1:]
         except AttributeError:
             args = ("[%s] -%s" % (itask, args[0]), ) + args[1:]
         args = tuple(args)
     self.update_time = time()
     if self.logger.handlers:
         # If this logger has file handlers write out to it.
         self.logger.log(level, *args, **kwargs)
     else:
         # No file handlers, write out to stdout/stderr.
         msg = (get_current_time_string() + ' ' + logging._levelNames[level]
                + ' - ' + str(args[0]), ) + tuple(*args[1:])
         if self.log_ in [SuiteLog.OUT, SuiteLog.LOG]:
             print(*msg)
         else:
             print(*msg, file=sys.stderr)
コード例 #57
0
ファイル: httpclient.py プロジェクト: ahuang11/cylc
    def put_messages(self, payload):
        """Send task messages to suite server program.

        Arguments:
            payload (dict):
                task_job (str): Task job as "CYCLE/TASK_NAME/SUBMIT_NUM".
                event_time (str): Event time as string.
                messages (list): List in the form [[severity, message], ...].
        """
        retry_intvl = float(self.comms1.get(
            self.srv_files_mgr.KEY_TASK_MSG_RETRY_INTVL,
            self.MSG_RETRY_INTVL))
        max_tries = int(self.comms1.get(
            self.srv_files_mgr.KEY_TASK_MSG_MAX_TRIES,
            self.MSG_MAX_TRIES))
        for i in range(1, max_tries + 1):  # 1..max_tries inclusive
            orig_timeout = self.timeout
            if self.timeout is None:
                self.timeout = self.MSG_TIMEOUT
            try:
                func_name = self._compat('put_messages')
                if func_name == 'put_messages':
                    results = self._call_server(func_name, payload=payload)
                elif func_name == 'put_message':  # API 1, 7.5.0 compat
                    cycle, name = payload['task_job'].split('/')[0:2]
                    for severity, message in payload['messages']:
                        results.append(self._call_server(
                            func_name, task_id='%s.%s' % (name, cycle),
                            severity=severity, message=message))
                else:  # API 0, pre-7.5.0 compat, priority instead of severity
                    cycle, name = payload['task_job'].split('/')[0:2]
                    for severity, message in payload['messages']:
                        results.append(self._call_server(
                            func_name, task_id='%s.%s' % (name, cycle),
                            priority=severity, message=message))
            except ClientInfoError:
                # Contact info file not found, suite probably not running.
                # Don't bother with retry, suite restart will poll any way.
                raise
            except ClientError as exc:
                now = get_current_time_string()
                sys.stderr.write(
                    "%s WARNING - Message send failed, try %s of %s: %s\n" % (
                        now, i, max_tries, exc))
                if i < max_tries:
                    sys.stderr.write(
                        "   retry in %s seconds, timeout is %s\n" % (
                            retry_intvl, self.timeout))
                    sleep(retry_intvl)
                    # Reset in case contact info or passphrase change
                    self.comms1 = {}
                    self.host = None
                    self.port = None
                    self.auth = None
            else:
                if i > 1:
                    # Continue to write to STDERR, so users can easily see that
                    # it has recovered from previous failures.
                    sys.stderr.write(
                        "%s INFO - Send message: try %s of %s succeeded\n" % (
                            get_current_time_string(), i, max_tries))
                return results
            finally:
                self.timeout = orig_timeout
コード例 #58
0
ファイル: task_events_mgr.py プロジェクト: ahuang11/cylc
    def process_message(self,
                        itask,
                        severity,
                        message,
                        event_time=None,
                        flag='',
                        submit_num=None):
        """Parse an incoming task message and update task state.

        Incoming, e.g. "succeeded at <TIME>", may be from task job or polling.

        It is possible for my current state to be inconsistent with an incoming
        message (whether normal or polled) e.g. due to a late poll result, or a
        network outage, or manual state reset. To handle this, if a message
        would take the task state backward, issue a poll to confirm instead of
        changing state - then always believe the next message. Note that the
        next message might not be the result of this confirmation poll, in the
        unlikely event that a job emits a succession of messages very quickly,
        but this is the best we can do without somehow uniquely associating
        each poll with its result message.

        Arguments:
            itask (cylc.task_proxy.TaskProxy):
                The task proxy object relevant for the message.
            severity (str or int):
                Message severity, should be a recognised logging level.
            message (str):
                Message content.
            event_time (str):
                Event time stamp. Expect ISO8601 date time string.
                If not specified, use current time.
            flag (str):
                If specified, can be INCOMING_FLAG to indicate an incoming
                message, POLLED_FLAG to indicate a message resulted from a
                poll. Otherwise, the message is assumed to be generated by the
                logic in the suite server program.
            submit_num (int):
                The submit number of the task relevant for the message.
                If not specified, use latest submit number.

        Return:
            None: in normal circumstances.
            True: if polling is required to confirm a reversal of status.

        """
        # Log incoming messages
        if event_time is None:
            event_time = get_current_time_string()
        if submit_num is None:
            submit_num = itask.submit_num
        if flag == self.INCOMING_FLAG and submit_num != itask.submit_num:
            flag = self.IGNORED_INCOMING_FLAG
        log_message = '(current:%s)%s %s at %s' % (itask.state.status, flag,
                                                   message, event_time)
        LOG.log(self.LEVELS.get(severity, INFO), log_message, itask=itask)
        if flag == self.IGNORED_INCOMING_FLAG:
            LOG.warning(
                'submit-num=%d: ignore message from job submit-num=%d' %
                (itask.submit_num, submit_num),
                itask=itask)
            return

        # always update the suite state summary for latest message
        itask.summary['latest_message'] = message
        if flag == self.POLLED_FLAG:
            itask.summary['latest_message'] += ' %s' % self.POLLED_FLAG
        cylc.flags.iflag = True

        # Satisfy my output, if possible, and record the result.
        an_output_was_satisfied = itask.state.outputs.set_msg_trg_completion(
            message=message, is_completed=True)

        if message == TASK_OUTPUT_STARTED:
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_RUNNING)):
                return True
            self._process_message_started(itask, event_time)
        elif message == TASK_OUTPUT_SUCCEEDED:
            self._process_message_succeeded(itask, event_time)
        elif message == TASK_OUTPUT_FAILED:
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            self._process_message_failed(itask, event_time, self.JOB_FAILED)
        elif message == self.EVENT_SUBMIT_FAILED:
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_SUBMIT_FAILED)):
                return True
            self._process_message_submit_failed(itask, event_time)
        elif message == TASK_OUTPUT_SUBMITTED:
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_SUBMITTED)):
                return True
            self._process_message_submitted(itask, event_time)
        elif message.startswith(FAIL_MESSAGE_PREFIX):
            # Task received signal.
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            signal = message[len(FAIL_MESSAGE_PREFIX):]
            self._db_events_insert(itask, "signaled", signal)
            self.suite_db_mgr.put_update_task_jobs(itask,
                                                   {"run_signal": signal})
            self._process_message_failed(itask, event_time, self.JOB_FAILED)
        elif message.startswith(ABORT_MESSAGE_PREFIX):
            # Task aborted with message
            if (flag == self.INCOMING_FLAG
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            aborted_with = message[len(ABORT_MESSAGE_PREFIX):]
            self._db_events_insert(itask, "aborted", message)
            self.suite_db_mgr.put_update_task_jobs(
                itask, {"run_signal": aborted_with})
            self._process_message_failed(itask, event_time, aborted_with)
        elif message.startswith(VACATION_MESSAGE_PREFIX):
            # Task job pre-empted into a vacation state
            self._db_events_insert(itask, "vacated", message)
            itask.set_summary_time('started')  # unset
            if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
                itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
            itask.job_vacated = True
            # Believe this and change state without polling (could poll?).
            self.pflag = True
            itask.state.reset_state(TASK_STATUS_SUBMITTED)
            self._reset_job_timers(itask)
        elif an_output_was_satisfied:
            # Message of an as-yet unreported custom task output.
            # No state change.
            self.pflag = True
            self.suite_db_mgr.put_update_task_outputs(itask)
        else:
            # Unhandled messages. These include:
            #  * general non-output/progress messages
            #  * poll messages that repeat previous results
            # Note that all messages are logged already at the top.
            # No state change.
            LOG.debug('(current:%s) unhandled: %s' %
                      (itask.state.status, message),
                      itask=itask)
            if severity in [CRITICAL, ERROR, WARNING, INFO, DEBUG]:
                severity = getLevelName(severity)
            self._db_events_insert(itask,
                                   ("message %s" % str(severity).lower()),
                                   message)
        if severity in ['WARNING', 'CRITICAL', 'CUSTOM']:
            self.setup_event_handlers(itask, severity.lower(), message)
コード例 #59
0
ファイル: multisubprocess.py プロジェクト: kaday/cylc
        return (out, err)


if __name__ == "__main__":
    commands = []
    for i in range(1, 5):
        if i == 4:
            command = ("echoX hello from " + str(i) +
                       "; sleep 10; echo bye from " + str(i))
        else:
            command = ("echo hello from " + str(i) +
                       "; sleep 10; echo bye from " + str(i))

        commands.append(command)

    print 'SRT:', get_current_time_string(display_sub_seconds=True)

    mp = multisubprocess(commands)
    out, err = mp.execute()

    count = 1
    for item in out:
        print count
        print item
        count += 1

    count = 1
    for item in err:
        print count
        print item
        count += 1