Exemple #1
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(f"{self.pub_dao.db_file_name}: recovered from "
                     f"{self.pri_dao.db_file_name}")
         self.pub_dao.n_tries = 0
Exemple #2
0
    def _execute_stmt(self, stmt, stmt_args_list):
        """Helper for "self.execute_queued_items".

        Execute a statement. If this is the public database, return True on
        success and False on failure. If this is the private database, return
        True on success, and raise on failure.
        """
        # Filter out CYLC_TEMPLATE_VARS which breaks executemany because it's:
        # - a dict
        # - recursive (contains itself!)
        if stmt_args_list and stmt_args_list[0]:
            stmt_args_list = [
                i for i in stmt_args_list if i[0] != 'CYLC_TEMPLATE_VARS'
            ]

        try:
            self.connect()
            self.conn.executemany(stmt, stmt_args_list)
        except sqlite3.Error:
            if not self.is_public:
                raise
            if cylc.flow.flags.verbosity > 1:
                traceback.print_exc()
            err_log = (
                "cannot execute database statement:\n"
                "file=%(file)s:\nstmt=%(stmt)s"
            ) % {"file": self.db_file_name, "stmt": stmt}
            for i, stmt_args in enumerate(stmt_args_list):
                err_log += ("\nstmt_args[%(i)d]=%(stmt_args)s" % {
                    "i": i, "stmt_args": stmt_args})
            LOG.warning(err_log)
            raise
Exemple #3
0
 def load(self):
     """Load or reload configuration from files."""
     self.sparse.clear()
     self.dense.clear()
     LOG.debug("Loading site/user config files")
     conf_path_str = os.getenv("CYLC_CONF_PATH")
     if conf_path_str:
         # Explicit config file override.
         fname = os.path.join(conf_path_str, self.CONF_BASENAME)
         if os.access(fname, os.F_OK | os.R_OK):
             self.loadcfg(fname, upgrader.USER_CONFIG)
     elif conf_path_str is None:
         # Use default locations.
         for conf_dir, conf_type in [
             (self.SITE_CONF_DIR, upgrader.SITE_CONFIG),
             (self.USER_CONF_DIR, upgrader.USER_CONFIG)
         ]:
             fname = os.path.join(conf_dir, self.CONF_BASENAME)
             if not os.access(fname, os.F_OK | os.R_OK):
                 continue
             try:
                 self.loadcfg(fname, conf_type)
             except ParsecError as exc:
                 if conf_type == upgrader.SITE_CONFIG:
                     # Warn on bad site file (users can't fix it).
                     LOG.warning('ignoring bad %s %s:\n%s', conf_type,
                                 fname, exc)
                 else:
                     # Abort on bad user file (users can fix it).
                     LOG.error('bad %s %s', conf_type, fname)
                     raise
     # (OK if no flow.rc is found, just use system defaults).
     self._transform()
Exemple #4
0
 def check_job_time(self, itask, now):
     """Check/handle job timeout and poll timer"""
     can_poll = self.check_poll_time(itask, now)
     if itask.timeout is None or now <= itask.timeout:
         return can_poll
     # Timeout reached for task, emit event and reset itask.timeout
     if itask.state(TASK_STATUS_RUNNING):
         time_ref = itask.summary['started_time']
         event = 'execution timeout'
     elif itask.state(TASK_STATUS_SUBMITTED):
         time_ref = itask.summary['submitted_time']
         event = 'submission timeout'
     msg = event
     try:
         msg += ' after %s' % intvl_as_str(itask.timeout - time_ref)
     except (TypeError, ValueError):
         # Badness in time_ref?
         pass
     itask.timeout = None  # emit event only once
     if msg and event:
         LOG.warning('[%s] -%s', itask, msg)
         self.setup_event_handlers(itask, event, msg)
         return True
     else:
         return can_poll
Exemple #5
0
    def load(self):
        """Load or reload configuration from files."""
        self.sparse.clear()
        self.dense.clear()
        LOG.debug("Loading site/user config files")
        conf_path_str = os.getenv("CYLC_CONF_PATH")
        if conf_path_str:
            # Explicit config file override.
            fname = os.path.join(conf_path_str, self.CONF_BASENAME)
            if os.access(fname, os.F_OK | os.R_OK):
                self.loadcfg(fname, upgrader.USER_CONFIG)
        elif conf_path_str is None:
            # Use default locations.
            for conf_type, conf_dir in self.CONF_DIR_HIERARCHY:
                fname = os.path.join(conf_dir, self.CONF_BASENAME)
                if not os.access(fname, os.F_OK | os.R_OK):
                    continue
                try:
                    self.loadcfg(fname, conf_type)
                except ParsecError as exc:
                    if conf_type == upgrader.SITE_CONFIG:
                        # Warn on bad site file (users can't fix it).
                        LOG.warning(
                            f'ignoring bad {conf_type} {fname}:\n{exc}')
                    else:
                        # Abort on bad user file (users can fix it).
                        LOG.error(f'bad {conf_type} {fname}')
                        raise

        self._set_default_editors()
Exemple #6
0
    def _remove_bad_hosts(self, mock_host_stats=None):
        """Return dictionary of 'good' hosts with their metric stats.

        Run 'get-host-metrics' on each run host in parallel & store extracted
        stats for hosts, else an empty JSON structure. Filter out 'bad' hosts
        whereby either metric data cannot be accessed from the command or at
        least one metric value does not pass a specified threshold.
        """
        if mock_host_stats:  # Create fake data for unittest purposes (only).
            host_stats = dict(mock_host_stats)  # Prevent mutable object issues
        else:
            if not self.hosts:
                return {}
            host_stats = self._get_host_metrics()
        # Analyse get-host-metrics results
        for host, data in list(dict(host_stats).items()):
            if not data:
                # No results for host (command failed) -> skip.
                host_stats.pop(host)
                continue
            for measure, cutoff in self.parsed_thresholds.items():
                datum = data[measure]
                # Cutoff is a minimum or maximum depending on measure context.
                if ((datum > cutoff and measure.startswith("load"))
                        or (datum < cutoff and
                            (measure == "memory"
                             or measure.startswith("disk-space")))):
                    # Alert user that threshold has not been met.
                    LOG.warning(
                        "host '%s' did not pass %s threshold " +
                        "(%s %s threshold %s)\n", host, measure, datum,
                        ">" if measure.startswith("load") else "<", cutoff)
                    host_stats.pop(host)
                    break
        return host_stats
Exemple #7
0
def merge_template_vars(native_tvars: Dict[str, Any],
                        plugin_result: Dict[str, Any]) -> Dict[str, Any]:
    """Manage the merger of Cylc Native and Plugin template variables.

    Args:
        native_tvars: Template variables set on the Cylc command line
            using ``-s`` or a template variable file.
        plugin_result: Plugin result which should contain _at least_
            "templating_detected" and "template_variable" keys.

    Returns:
        template_variables.

    Strategy:
        template variables set in a Cylc Native way should override
        the results of plugins.

    Examples:
        >>> a = {'FOO': 42, 'BAR': 'Hello World'}
        >>> tvars = {'FOO': 24, 'BAZ': 3.14159}
        >>> b = {'templating_detected': 'any', 'template_variables': tvars}
        >>> merge_template_vars(a, b)
        {'FOO': 42, 'BAZ': 3.14159, 'BAR': 'Hello World'}
    """
    if plugin_result['templating_detected'] is not None:
        plugin_tvars = plugin_result['template_variables']
        will_be_overwritten = (native_tvars.keys() & plugin_tvars.keys())
        for key in will_be_overwritten:
            if plugin_tvars[key] != native_tvars[key]:
                LOG.warning(f'Overriding {key}: {plugin_tvars[key]} ->'
                            f' {native_tvars[key]}')
        plugin_tvars.update(native_tvars)
        return plugin_tvars
    else:
        return native_tvars
Exemple #8
0
    def _process_message_check(
        self,
        itask,
        severity,
        message,
        event_time,
        flag,
        submit_num,
    ):
        """Helper for `.process_message`.

        See `.process_message` for argument list
        Check whether to process/skip message.
        Return True if `.process_message` should contine, False otherwise.
        """
        logfmt = r'[%s] status=%s: %s%s at %s for job(%02d)'
        if flag == self.FLAG_RECEIVED and submit_num != itask.submit_num:
            # Ignore received messages from old jobs
            LOG.warning(logfmt + r' != current job(%02d)', itask, itask.state,
                        self.FLAG_RECEIVED_IGNORED, message, event_time,
                        submit_num, itask.submit_num)
            return False
        if itask.state.status in (TASK_STATUS_SUBMIT_RETRYING,
                                  TASK_STATUS_RETRYING):
            # Ignore polled messages if task is already in retrying statuses
            LOG.warning(logfmt, itask, itask.state, self.FLAG_POLLED_IGNORED,
                        message, event_time, submit_num)
            return False
        LOG.log(self.LEVELS.get(severity, INFO), logfmt, itask, itask.state,
                flag, message, event_time, submit_num)
        return True
Exemple #9
0
def deprecation_warnings(config_tree):
    """Check for deprecated items in config.
    Logs a warning for deprecated items:
        - "root-dir"
        - "jinja2:suite.rc"
        - "empy:suite.rc"

    """

    deprecations = {
        'empy:suite.rc': (
            "'empy:suite.rc' is deprecated."
            " Use [template variables] instead."),
        'jinja2:suite.rc': (
            "'jinja2:suite.rc' is deprecated."
            " Use [template variables] instead."),
        'root-dir': (
            'You have set "root-dir", which is not supported at '
            'Cylc 8. Use `[install] symlink dirs` in global.cylc '
            'instead.')
    }
    for string in list(config_tree.node):
        for deprecation in deprecations.keys():
            if deprecation in string:
                LOG.warning(deprecations[deprecation])
Exemple #10
0
def parse_suite_arg(options, arg):
    """From CLI arg "SUITE", return suite name and flow.cylc path.

    If arg is a registered suite, suite name is the registered name.
    If arg is a directory, suite name is the base name of the
    directory.
    If arg is a file, suite name is the base name of its container
    directory.
    """
    if arg == '.':
        arg = os.getcwd()
    try:
        path = get_flow_file(arg, options.suite_owner)
        name = arg
    except SuiteServiceFileError:
        arg = os.path.abspath(arg)
        if os.path.isdir(arg):
            path = os.path.join(arg, SuiteFiles.FLOW_FILE)
            name = os.path.basename(arg)
            if not os.path.exists(path):
                # Probably using deprecated suite.rc
                path = os.path.join(arg, SuiteFiles.SUITE_RC)
                if not os.path.exists(path):
                    raise SuiteServiceFileError(
                        f'no flow.cylc or suite.rc in {arg}')
                else:
                    LOG.warning(
                        f'The filename "{SuiteFiles.SUITE_RC}" is deprecated '
                        f'in favor of "{SuiteFiles.FLOW_FILE}".')
        else:
            path = arg
            name = os.path.basename(os.path.dirname(arg))
    return name, path
Exemple #11
0
    def _execute_stmt(self, stmt, stmt_args_list):
        """Helper for "self.execute_queued_items".

        Execute a statement. If this is the public database, return True on
        success and False on failure. If this is the private database, return
        True on success, and raise on failure.
        """
        try:
            self.connect()
            self.conn.executemany(stmt, stmt_args_list)
        except sqlite3.Error:
            if not self.is_public:
                raise
            if cylc.flow.flags.debug:
                traceback.print_exc()
            err_log = ("cannot execute database statement:\n"
                       "file=%(file)s:\nstmt=%(stmt)s") % {
                           "file": self.db_file_name,
                           "stmt": stmt
                       }
            for i, stmt_args in enumerate(stmt_args_list):
                err_log += ("\nstmt_args[%(i)d]=%(stmt_args)s" % {
                    "i": i,
                    "stmt_args": stmt_args
                })
            LOG.warning(err_log)
            raise
Exemple #12
0
 def _job_cmd_out_callback(suite, itask, cmd_ctx, line):
     """Callback on job command STDOUT/STDERR."""
     if cmd_ctx.cmd_kwargs.get("host") and cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("host"):
         owner_at_host = "(%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@localhost) " % cmd_ctx.cmd_kwargs
     else:
         owner_at_host = ""
     try:
         timestamp, _, content = line.split("|")
     except ValueError:
         pass
     else:
         line = "%s %s" % (timestamp, content)
     job_activity_log = get_task_job_activity_log(
         suite, itask.point, itask.tdef.name)
     try:
         with open(job_activity_log, "ab") as handle:
             if not line.endswith("\n"):
                 line += "\n"
             handle.write((owner_at_host + line).encode())
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
         LOG.warning("[%s] -%s%s", itask, owner_at_host, line)
Exemple #13
0
 def _job_cmd_out_callback(suite, itask, cmd_ctx, line):
     """Callback on job command STDOUT/STDERR."""
     if cmd_ctx.cmd_kwargs.get("host") and cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("host"):
         owner_at_host = "(%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@localhost) " % cmd_ctx.cmd_kwargs
     else:
         owner_at_host = ""
     try:
         timestamp, _, content = line.split("|")
     except ValueError:
         pass
     else:
         line = "%s %s" % (timestamp, content)
     job_activity_log = get_task_job_activity_log(suite, itask.point,
                                                  itask.tdef.name)
     try:
         with open(job_activity_log, "ab") as handle:
             if not line.endswith("\n"):
                 line += "\n"
             handle.write((owner_at_host + line).encode())
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
         LOG.warning("[%s] -%s%s", itask, owner_at_host, line)
Exemple #14
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(os.path.join(glbl_cfg().get_derived_host_item(
                self.suite, 'suite run directory', host, owner)))
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
Exemple #15
0
def _get_metrics(hosts, metrics, data=None):
    """Retrieve host metrics using SSH if necessary.

    Note hosts will not appear in the returned results if:
    * They are not contactable.
    * There is an error in the command which returns the results.

    Args:
        hosts (list):
            List of host fqdns.
        metrics (list):
            List in the form [(function, arg1, arg2, ...), ...]
        data (dict):
            Used for logging success/fail outcomes of the form {host: {}}

    Examples:
        Command failure:
        >>> _get_metrics(['localhost'], [['elephant']])
        ({}, {'localhost': {'get_metrics': 'Command failed (exit: 1)'}})

    Returns:
        dict - {host: {(function, arg1, arg2, ...): result}}

    """
    host_stats = {}
    proc_map = {}
    if not data:
        data = {host: dict() for host in hosts}

    # Start up commands on hosts
    cmd = ['psutil']
    kwargs = {'stdin_str': json.dumps(metrics), 'capture_process': True}
    for host in hosts:
        if is_remote_host(host):
            proc_map[host] = remote_cylc_cmd(cmd, host=host, **kwargs)
        else:
            proc_map[host] = run_cmd(['cylc'] + cmd, **kwargs)

    # Collect results from commands
    while proc_map:
        for host, proc in list(proc_map.copy().items()):
            if proc.poll() is None:
                continue
            del proc_map[host]
            out, err = (f.decode() for f in proc.communicate())
            if proc.wait():
                # Command failed in verbose/debug mode
                LOG.warning('Could not evaluate "%s" (return code %d)\n%s',
                            host, proc.returncode, err)
                data[host]['get_metrics'] = (
                    f'Command failed (exit: {proc.returncode})')
            else:
                host_stats[host] = dict(
                    zip(
                        metrics,
                        # convert JSON dicts -> namedtuples
                        _deserialise(metrics, parse_dirty_json(out))))
        sleep(0.01)
    return host_stats, data
Exemple #16
0
 def timed_out(self) -> bool:
     """Return whether timed out yet."""
     if self.timeout is not None and now() > self.timeout:
         LOG.warning(f"{self.name} timed out after {self.interval}")
         self.timeout = None
         return True
     else:
         return False
Exemple #17
0
    def _process_message_check(
        self,
        itask,
        severity,
        message,
        event_time,
        flag,
        submit_num,
    ):
        """Helper for `.process_message`.

        See `.process_message` for argument list
        Check whether to process/skip message.
        Return True if `.process_message` should contine, False otherwise.
        """
        if self.timestamp:
            timestamp = " at %s " % event_time
        else:
            timestamp = ""
        logfmt = r'[%s] status=%s: %s%s%s for job(%02d) flow(%s)'
        if flag == self.FLAG_RECEIVED and submit_num != itask.submit_num:
            # Ignore received messages from old jobs
            LOG.warning(
                logfmt + r' != current job(%02d)',
                itask, itask.state, self.FLAG_RECEIVED_IGNORED, message,
                timestamp, submit_num, itask.flow_label, itask.submit_num)
            return False

        if (
                itask.state(TASK_STATUS_WAITING)
                and
                (
                    (
                        # task has a submit-retry lined up
                        TimerFlags.SUBMISSION_RETRY in itask.try_timers
                        and itask.try_timers[
                            TimerFlags.SUBMISSION_RETRY].num > 0
                    )
                    or
                    (
                        # task has an execution-retry lined up
                        TimerFlags.EXECUTION_RETRY in itask.try_timers
                        and itask.try_timers[
                            TimerFlags.EXECUTION_RETRY].num > 0
                    )
                )

        ):
            # Ignore polled messages if task has a retry lined up
            LOG.warning(
                logfmt,
                itask, itask.state, self.FLAG_POLLED_IGNORED, message,
                timestamp, submit_num, itask.flow_label)
            return False
        LOG.log(
            LOG_LEVELS.get(severity, INFO), logfmt, itask, itask.state, flag,
            message, timestamp, submit_num, itask.flow_label)
        return True
Exemple #18
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(
             "%(pub_db_name)s: recovered from %(pri_db_name)s" % {
                 "pub_db_name": self.pub_dao.db_file_name,
                 "pri_db_name": self.pri_dao.db_file_name})
         self.pub_dao.n_tries = 0
Exemple #19
0
 def _manip_task_jobs_callback(self,
                               ctx,
                               suite,
                               itasks,
                               summary_callback,
                               more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning('Unhandled %s output: %s', ctx.cmd_key,
                                 line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = ("|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Exemple #20
0
def _backup(tgt: Path) -> None:
    """Make a timestamped backup of a dir or file."""
    tstamp = get_current_time_string(use_basic_format=True)
    backup = Path(tgt).parent / (tgt.name + f'.{tstamp}')
    LOG.warning('Replacing an existing cylc-tutorials folder which will'
                f' be copied to {backup}')
    # NOTE: shutil interfaces don't fully support Path objects at all
    # python versions
    shutil.move(str(tgt), str(backup))
Exemple #21
0
 def is_valid_point(self, point):
     """Return True if point is on-sequence and within bounds."""
     for sequence in self.sequences:
         if sequence.is_valid(point):
             return True
     else:
         LOG.warning("%s%s, %s" % (
             self.ERR_PREFIX_TASK_NOT_ON_SEQUENCE, self.name, point))
         return False
Exemple #22
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(
             "%(pub_db_name)s: recovered from %(pri_db_name)s" % {
                 "pub_db_name": self.pub_dao.db_file_name,
                 "pri_db_name": self.pri_dao.db_file_name})
         self.pub_dao.n_tries = 0
Exemple #23
0
 def execute_queued_items(self):
     """Execute queued items for each table."""
     try:
         for table in self.tables.values():
             # DELETE statements may have varying number of WHERE args so we
             # can only executemany for each identical template statement.
             for stmt, stmt_args_list in table.delete_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
             # INSERT statements are uniform for each table, so all INSERT
             # statements can be executed using a single "executemany" call.
             if table.insert_queue:
                 self._execute_stmt(table.get_insert_stmt(),
                                    table.insert_queue)
             # UPDATE statements can have varying number of SET and WHERE
             # args so we can only executemany for each identical template
             # statement.
             for stmt, stmt_args_list in table.update_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
         # Connection should only be opened if we have executed something.
         if self.conn is None:
             return
         self.conn.commit()
     except sqlite3.Error:
         if not self.is_public:
             raise
         self.n_tries += 1
         LOG.warning(
             "%(file)s: write attempt (%(attempt)d) did not complete\n" % {
                 "file": self.db_file_name,
                 "attempt": self.n_tries
             })
         if self.conn is not None:
             try:
                 self.conn.rollback()
             except sqlite3.Error:
                 pass
         return
     else:
         # Clear the queues
         for table in self.tables.values():
             table.delete_queues.clear()
             table.insert_queue.clear()
             table.update_queues.clear()
         # Report public database retry recovery if necessary
         if self.n_tries:
             LOG.warning(
                 "%(file)s: recovered after (%(attempt)d) attempt(s)\n" % {
                     "file": self.db_file_name,
                     "attempt": self.n_tries
                 })
         self.n_tries = 0
     finally:
         # Note: This is not strictly necessary. However, if the suite run
         # directory is removed, a forced reconnection to the private
         # database will ensure that the suite dies.
         self.close()
Exemple #24
0
    def remote_tidy(self):
        """Remove workflow contact files and keys from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on workflow shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for install_target, message in self.remote_init_map.items():
            if message != REMOTE_FILE_INSTALL_DONE:
                continue
            if install_target == get_localhost_install_target():
                continue
            platform = get_random_platform_for_install_target(install_target)
            platform_n = platform['name']
            cmd = ['remote-tidy']
            if cylc.flow.flags.verbosity > 1:
                cmd.append('--debug')
            cmd.append(install_target)
            cmd.append(get_remote_workflow_run_dir(self.workflow))
            cmd = construct_ssh_cmd(cmd, platform, timeout='10s')
            LOG.debug("Removing authentication keys and contact file "
                      f"from remote: \"{install_target}\"")
            procs[platform_n] = (cmd,
                                 Popen(cmd,
                                       stdout=PIPE,
                                       stderr=PIPE,
                                       stdin=DEVNULL))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for platform_n, (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[platform_n]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(
                        TaskRemoteMgmtError(
                            TaskRemoteMgmtError.MSG_TIDY, platform_n,
                            ' '.join(quote(item) for item in cmd),
                            proc.returncode, out, err))
        # Terminate any remaining commands
        for platform_n, (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = (f.decode() for f in proc.communicate())
            if proc.wait():
                LOG.warning(
                    TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY,
                                        platform_n,
                                        ' '.join(quote(item) for item in cmd),
                                        proc.returncode, out, err))
Exemple #25
0
def get_rose_vars(srcdir=None, opts=None):
    """Load template variables from Rose suite configuration.

    Loads the Rose suite configuration tree from the filesystem
    using the shell environment.

    Args:
        srcdir(pathlib.Path):
            Path to the Rose suite configuration
            (the directory containing the ``rose-suite.conf`` file).
        opts:
            Options object containing specification of optional
            configuarations set by the CLI.

    Returns:
        dict - A dictionary of sections of rose-suite.conf.
        For each section either a dictionary or None is returned.
        E.g.
            {
                'env': {'MYVAR': 42},
                'empy:suite.rc': None,
                'jinja2:suite.rc': {
                    'myJinja2Var': {'yes': 'it is a dictionary!'}
                }
            }
    """
    # Set up blank page for returns.
    config = {'env': {}, 'template_variables': {}, 'templating_detected': None}

    # Return a blank config dict if srcdir does not exist
    if not rose_config_exists(srcdir, opts):
        if (getattr(opts, "opt_conf_keys", None)
                or getattr(opts, "defines", None)
                or getattr(opts, "rose_template_vars", None)):
            raise NotARoseSuiteException()
        return config

    # Load the raw config tree
    config_tree = rose_config_tree_loader(srcdir, opts)
    # Warn if root-dir set in config
    for string in list(config_tree.node):
        if 'root-dir' in string:
            LOG.warning('You have set "root-dir", which is not supported at '
                        'Cylc 8. Use `[install] symlink dirs` in global.cylc '
                        'instead.')
            break

    # Extract templatevars from the configuration
    get_rose_vars_from_config_node(config, config_tree.node, os.environ)

    # Export environment vars
    for key, val in config['env'].items():
        os.environ[key] = val

    return config
Exemple #26
0
 def _manip_task_jobs_callback(
         self, ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning(
                         'Unhandled %s output: %s', ctx.cmd_key, line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = (
             "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Exemple #27
0
def upg(cfg, descr):
    """Upgrade old suite configuration."""
    u = upgrader(cfg, descr)
    u.obsolete('6.1.3', ['visualization', 'enable live graph movie'])
    u.obsolete('7.2.2', ['cylc', 'dummy mode'])
    u.obsolete('7.2.2', ['cylc', 'simulation mode'])
    u.obsolete('7.2.2', ['runtime', '__MANY__', 'dummy mode'])
    u.obsolete('7.2.2', ['runtime', '__MANY__', 'simulation mode'])
    u.obsolete('7.6.0', ['runtime', '__MANY__', 'enable resurrection'])
    u.obsolete('7.8.0',
               ['runtime', '__MANY__', 'suite state polling', 'template'])
    u.obsolete('7.8.1', ['cylc', 'events', 'reset timer'])
    u.obsolete('7.8.1', ['cylc', 'events', 'reset inactivity timer'])
    u.obsolete('7.8.1', ['runtime', '__MANY__', 'events', 'reset timer'])
    u.obsolete('8.0.0', ['cylc', 'log resolved dependencies'])
    u.obsolete('8.0.0', ['cylc', 'reference test', 'allow task failures'])
    u.obsolete('8.0.0', ['cylc', 'reference test', 'live mode suite timeout'])
    u.obsolete('8.0.0', ['cylc', 'reference test', 'dummy mode suite timeout'])
    u.obsolete('8.0.0',
               ['cylc', 'reference test', 'dummy-local mode suite timeout'])
    u.obsolete('8.0.0',
               ['cylc', 'reference test', 'simulation mode suite timeout'])
    u.obsolete('8.0.0', ['cylc', 'reference test', 'required run mode'])
    u.obsolete('8.0.0',
               ['cylc', 'reference test', 'suite shutdown event handler'])
    u.deprecate('8.0.0', ['cylc', 'abort if any task fails'],
                ['cylc', 'events', 'abort if any task fails'])
    u.obsolete('8.0.0', ['runtime', '__MANY__', 'job', 'shell'])
    u.upgrade()

    # Upgrader cannot do this type of move.
    try:
        keys = set()
        cfg['scheduling'].setdefault('graph', {})
        cfg['scheduling']['graph'].update(
            cfg['scheduling'].pop('dependencies'))
        graphdict = cfg['scheduling']['graph']
        for key, value in graphdict.copy().items():
            if isinstance(value, dict) and 'graph' in value:
                graphdict[key] = value['graph']
                keys.add(key)
        if keys:
            LOG.warning(
                "deprecated graph items were automatically upgraded in '%s':",
                descr)
            LOG.warning(
                ' * (8.0.0) %s -> %s - for X in:\n%s',
                u.show_keys(['scheduling', 'dependencies', 'X', 'graph']),
                u.show_keys(['scheduling', 'graph', 'X']),
                '\n'.join(sorted(keys)),
            )
    except KeyError:
        pass
Exemple #28
0
def upgrade_legacy_ids(*ids: str) -> List[str]:
    """Reformat IDs from legacy to contemporary format:

    If no upgrading is required it returns the identifiers unchanged.

    Args:
        *ids (tuple): Identifier list.

    Returns:
        tuple/list - Identifier list.

        # do nothing to contemporary ids:
        >>> upgrade_legacy_ids('workflow')
        ['workflow']

        >>> upgrade_legacy_ids('workflow', '//cycle')
        ['workflow', '//cycle']

        # upgrade legacy task.cycle ids:
        >>> upgrade_legacy_ids('workflow', 'task.123', 'task.234')
        ['workflow', '//123/task', '//234/task']

        # upgrade legacy cycle/task ids:
        >>> upgrade_legacy_ids('workflow', '123/task', '234/task')
        ['workflow', '//123/task', '//234/task']

        # upgrade mixed legacy ids:
        >>> upgrade_legacy_ids('workflow', 'task.123', '234/task')
        ['workflow', '//123/task', '//234/task']

        # upgrade legacy task states:
        >>> upgrade_legacy_ids('workflow', 'task.123:abc', '234/task:def')
        ['workflow', '//123/task:abc', '//234/task:def']

    """
    if len(ids) < 2:
        # only legacy relative references require upgrade => abort
        return list(ids)

    legacy_ids = [ids[0]]
    for id_ in ids[1:]:
        try:
            tokens = legacy_tokenise(id_)
        except ValueError:
            # not a valid legacy token => abort
            return list(ids)
        else:
            # upgrade this token
            legacy_ids.append(detokenise(tokens, selectors=True))

    LOG.warning(f'Cylc7 format is deprecated, using: {" ".join(legacy_ids)}'
                ' (see "cylc help id")')
    return legacy_ids
Exemple #29
0
    def remote_tidy(self):
        """Remove suite contact files and keys from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for platform, init_with_contact in self.remote_init_map.items():
            platform = get_platform(platform)
            host = get_host_from_platform(platform)
            owner = platform['owner']
            self.install_target = get_install_target_from_platform(platform)
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['remote-tidy']
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(str(f'{self.install_target}'))
            cmd.append(get_remote_suite_run_dir(platform, self.suite))
            if is_remote_platform(platform):
                cmd = construct_platform_ssh_cmd(cmd, platform, timeout='10s')
            else:
                cmd = ['cylc'] + cmd
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
Exemple #30
0
 def execute_queued_items(self):
     """Execute queued items for each table."""
     try:
         for table in self.tables.values():
             # DELETE statements may have varying number of WHERE args so we
             # can only executemany for each identical template statement.
             for stmt, stmt_args_list in table.delete_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
             # INSERT statements are uniform for each table, so all INSERT
             # statements can be executed using a single "executemany" call.
             if table.insert_queue:
                 self._execute_stmt(
                     table.get_insert_stmt(), table.insert_queue)
             # UPDATE statements can have varying number of SET and WHERE
             # args so we can only executemany for each identical template
             # statement.
             for stmt, stmt_args_list in table.update_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
         # Connection should only be opened if we have executed something.
         if self.conn is None:
             return
         self.conn.commit()
     except sqlite3.Error:
         if not self.is_public:
             raise
         self.n_tries += 1
         LOG.warning(
             "%(file)s: write attempt (%(attempt)d) did not complete\n" % {
                 "file": self.db_file_name, "attempt": self.n_tries})
         if self.conn is not None:
             try:
                 self.conn.rollback()
             except sqlite3.Error:
                 pass
         return
     else:
         # Clear the queues
         for table in self.tables.values():
             table.delete_queues.clear()
             del table.insert_queue[:]  # list.clear avail from Python 3.3
             table.update_queues.clear()
         # Report public database retry recovery if necessary
         if self.n_tries:
             LOG.warning(
                 "%(file)s: recovered after (%(attempt)d) attempt(s)\n" % {
                     "file": self.db_file_name, "attempt": self.n_tries})
         self.n_tries = 0
     finally:
         # Note: This is not strictly necessary. However, if the suite run
         # directory is removed, a forced reconnection to the private
         # database will ensure that the suite dies.
         self.close()
Exemple #31
0
def get_rose_vars(srcdir=None, opts=None):
    """Load template variables from Rose suite configuration.

    Loads the Rose suite configuration tree from the filesystem
    using the shell environment.

    Args:
        srcdir(pathlib.Path):
            Path to the Rose suite configuration
            (the directory containing the ``rose-suite.conf`` file).
        opts:
            Options object containing specification of optional
            configuarations set by the CLI.

    Returns:
        dict - A dictionary of sections of rose-suite.conf.
        For each section either a dictionary or None is returned.
        E.g.
            {
                'env': {'MYVAR': 42},
                'empy:suite.rc': None,
                'jinja2:suite.rc': {
                    'myJinja2Var': {'yes': 'it is a dictionary!'}
                }
            }
    """
    # Set up blank page for returns.
    config = {'env': {}, 'template_variables': {}, 'templating_detected': None}

    # Return a blank config dict if srcdir does not exist
    if not rose_config_exists(srcdir, opts):
        return config

    # Load the raw config tree
    config_tree = rose_config_tree_loader(srcdir, opts)

    # Warn if root-dir set in config:
    if 'root-dir' in config_tree.node:
        LOG.warning('You have set "root-dir", which at Cylc 8 does nothing. '
                    'See Cylc Install documentation.')

    # Extract templatevars from the configuration
    get_rose_vars_from_config_node(config, config_tree.node, os.environ)

    # Export environment vars
    for key, val in config['env'].items():
        os.environ[key] = val

    return config
Exemple #32
0
    def _get_host_metrics(self):
        """Run "cylc get-host-metrics" commands on hosts.

        Return (dict): {host: host-metrics-dict, ...}
        """
        host_stats = {}
        # Run "cylc get-host-metrics" commands on hosts
        host_proc_map = {}
        cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts())
        # Start up commands on hosts
        for host in self.hosts:
            if is_remote_host(host):
                host_proc_map[host] = remote_cylc_cmd(cmd,
                                                      stdin=None,
                                                      host=host,
                                                      capture_process=True)
            elif 'localhost' in host_proc_map:
                continue  # Don't duplicate localhost
            else:
                # 1st instance of localhost
                host_proc_map['localhost'] = run_cmd(['cylc'] + cmd,
                                                     capture_process=True)
        # Collect results from commands
        while host_proc_map:
            for host, proc in list(host_proc_map.copy().items()):
                if proc.poll() is None:
                    continue
                del host_proc_map[host]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    # Command failed in verbose/debug mode
                    LOG.warning(
                        "can't get host metric from '%s'" +
                        "%s  # returncode=%d, err=%s\n", host, ' '.join(
                            (quote(item) for item in cmd)), proc.returncode,
                        err)
                else:
                    # Command OK
                    # Users may have profile scripts that write to STDOUT.
                    # Drop all output lines until the the first character of a
                    # line is '{'. Hopefully this is enough to find us the
                    # first line that denotes the beginning of the expected
                    # JSON data structure.
                    out = ''.join(
                        dropwhile(lambda s: not s.startswith('{'),
                                  out.splitlines(True)))
                    host_stats[host] = json.loads(out)
            sleep(0.01)
        return host_stats
Exemple #33
0
    def kill_task_jobs(self, suite, itasks):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        to_kill_tasks = []
        for itask in itasks:
            if itask.state(*TASK_STATUSES_ACTIVE):
                itask.state.reset(is_held=True)
                to_kill_tasks.append(itask)
            else:
                LOG.warning('skipping %s: task not killable' % itask.identity)
        self._run_job_cmd(self.JOBS_KILL, suite, to_kill_tasks,
                          self._kill_task_jobs_callback)
Exemple #34
0
def process_mail_footer(
    mail_footer_tmpl: str,
    template_vars,
) -> str:
    """Process mail footer for workflow or task events.

    Returns an empty string if issues occur in processing.
    """
    try:
        return (mail_footer_tmpl + '\n') % template_vars
    except (KeyError, ValueError):
        LOG.warning(
            f'Ignoring bad mail footer template: {mail_footer_tmpl}'
        )
    return ''
Exemple #35
0
    def kill_task_jobs(self, suite, itasks):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        to_kill_tasks = []
        for itask in itasks:
            if itask.state.status in TASK_STATUSES_ACTIVE:
                itask.state.set_held()
                to_kill_tasks.append(itask)
            else:
                LOG.warning('skipping %s: task not killable' % itask.identity)
        self._run_job_cmd(
            self.JOBS_KILL, suite, to_kill_tasks,
            self._kill_task_jobs_callback)
Exemple #36
0
    def _get_host_metrics(self):
        """Run "cylc get-host-metrics" commands on hosts.

        Return (dict): {host: host-metrics-dict, ...}
        """
        host_stats = {}
        # Run "cylc get-host-metrics" commands on hosts
        host_proc_map = {}
        cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts())
        # Start up commands on hosts
        for host in self.hosts:
            if is_remote_host(host):
                host_proc_map[host] = remote_cylc_cmd(
                    cmd, stdin=None, host=host, capture_process=True)
            elif 'localhost' in host_proc_map:
                continue  # Don't duplicate localhost
            else:
                # 1st instance of localhost
                host_proc_map['localhost'] = run_cmd(
                    ['cylc'] + cmd, capture_process=True)
        # Collect results from commands
        while host_proc_map:
            for host, proc in list(host_proc_map.copy().items()):
                if proc.poll() is None:
                    continue
                del host_proc_map[host]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    # Command failed in verbose/debug mode
                    LOG.warning(
                        "can't get host metric from '%s'" +
                        "%s  # returncode=%d, err=%s\n",
                        host, ' '.join((quote(item) for item in cmd)),
                        proc.returncode, err)
                else:
                    # Command OK
                    # Users may have profile scripts that write to STDOUT.
                    # Drop all output lines until the the first character of a
                    # line is '{'. Hopefully this is enough to find us the
                    # first line that denotes the beginning of the expected
                    # JSON data structure.
                    out = ''.join(dropwhile(
                        lambda s: not s.startswith('{'), out.splitlines(True)))
                    host_stats[host] = json.loads(out)
            sleep(0.01)
        return host_stats
Exemple #37
0
    def _process_message_started(self, itask, event_time):
        """Helper for process_message, handle a started message."""
        if itask.job_vacated:
            itask.job_vacated = False
            LOG.warning("[%s] -Vacated job restarted", itask)
        self.pflag = True
        job_d = get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)
        if itask.state.reset(TASK_STATUS_RUNNING):
            self.setup_event_handlers(itask, 'started', 'job started')
            self.job_pool.set_job_state(job_d, TASK_STATUS_RUNNING)
        itask.set_summary_time('started', event_time)
        self.job_pool.set_job_time(job_d, 'started', event_time)
        self._reset_job_timers(itask)
        self.suite_db_mgr.put_update_task_jobs(
            itask, {"time_run": itask.summary['started_time_string']})

        # submission was successful so reset submission try number
        if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
            itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
Exemple #38
0
 def load(self):
     """Load or reload configuration from files."""
     self.sparse.clear()
     self.dense.clear()
     LOG.debug("Loading site/user global config files")
     conf_path_str = os.getenv("CYLC_CONF_PATH")
     if conf_path_str is None:
         # CYLC_CONF_PATH not defined, use default locations.
         for conf_dir_1, conf_dir_2, conf_type in [
                 (self.SITE_CONF_DIR, self.SITE_CONF_DIR_OLD,
                  upgrader.SITE_CONFIG),
                 (self.USER_CONF_DIR_1, self.USER_CONF_DIR_2,
                  upgrader.USER_CONFIG)]:
             fname1 = os.path.join(conf_dir_1, self.CONF_BASE)
             fname2 = os.path.join(conf_dir_2, self.CONF_BASE)
             if os.access(fname1, os.F_OK | os.R_OK):
                 fname = fname1
             elif os.access(fname2, os.F_OK | os.R_OK):
                 fname = fname2
             else:
                 continue
             try:
                 self.loadcfg(fname, conf_type)
             except ParsecError as exc:
                 if conf_type == upgrader.SITE_CONFIG:
                     # Warn on bad site file (users can't fix it).
                     LOG.warning(
                         'ignoring bad %s %s:\n%s', conf_type, fname, exc)
                 else:
                     # Abort on bad user file (users can fix it).
                     LOG.error('bad %s %s', conf_type, fname)
                     raise
                 break
     elif conf_path_str:
         # CYLC_CONF_PATH defined with a value
         for path in conf_path_str.split(os.pathsep):
             fname = os.path.join(path, self.CONF_BASE)
             if os.access(fname, os.F_OK | os.R_OK):
                 self.loadcfg(fname, upgrader.USER_CONFIG)
     # (OK if no global.rc is found, just use system defaults).
     self.transform()
Exemple #39
0
    def _execute_stmt(self, stmt, stmt_args_list):
        """Helper for "self.execute_queued_items".

        Execute a statement. If this is the public database, return True on
        success and False on failure. If this is the private database, return
        True on success, and raise on failure.
        """
        try:
            self.connect()
            self.conn.executemany(stmt, stmt_args_list)
        except sqlite3.Error:
            if not self.is_public:
                raise
            if cylc.flow.flags.debug:
                traceback.print_exc()
            err_log = (
                "cannot execute database statement:\n"
                "file=%(file)s:\nstmt=%(stmt)s"
            ) % {"file": self.db_file_name, "stmt": stmt}
            for i, stmt_args in enumerate(stmt_args_list):
                err_log += ("\nstmt_args[%(i)d]=%(stmt_args)s" % {
                    "i": i, "stmt_args": stmt_args})
            LOG.warning(err_log)
            raise
Exemple #40
0
    def _remove_bad_hosts(self, mock_host_stats=None):
        """Return dictionary of 'good' hosts with their metric stats.

        Run 'get-host-metrics' on each run host in parallel & store extracted
        stats for hosts, else an empty JSON structure. Filter out 'bad' hosts
        whereby either metric data cannot be accessed from the command or at
        least one metric value does not pass a specified threshold.
        """
        if mock_host_stats:  # Create fake data for unittest purposes (only).
            host_stats = dict(mock_host_stats)  # Prevent mutable object issues
        else:
            if not self.hosts:
                return {}
            host_stats = self._get_host_metrics()
        # Analyse get-host-metrics results
        for host, data in list(dict(host_stats).items()):
            if not data:
                # No results for host (command failed) -> skip.
                host_stats.pop(host)
                continue
            for measure, cutoff in self.parsed_thresholds.items():
                datum = data[measure]
                # Cutoff is a minimum or maximum depending on measure context.
                if ((datum > cutoff and measure.startswith("load")) or
                    (datum < cutoff and (
                        measure == "memory" or
                        measure.startswith("disk-space")))):
                    # Alert user that threshold has not been met.
                    LOG.warning(
                        "host '%s' did not pass %s threshold " +
                        "(%s %s threshold %s)\n",
                        host, measure, datum,
                        ">" if measure.startswith("load") else "<", cutoff)
                    host_stats.pop(host)
                    break
        return host_stats
Exemple #41
0
    def register(self, reg=None, source=None, redirect=False):
        """Register a suite, or renew its registration.

        Create suite service directory and symlink to suite source location.

        Args:
            reg (str): suite name, default basename($PWD).
            source (str): directory location of suite.rc file, default $PWD.
            redirect (bool): allow reuse of existing name and run directory.

        Return:
            The registered suite name (which may be computed here).

        Raise:
            SuiteServiceFileError:
                No suite.rc file found in source location.
                Illegal name (can look like a relative path, but not absolute).
                Another suite already has this name (unless --redirect).
        """
        if reg is None:
            reg = os.path.basename(os.getcwd())

        if os.path.isabs(reg):
            raise SuiteServiceFileError(
                "suite name cannot be an absolute path: %s" % reg)

        if source is not None:
            if os.path.basename(source) == self.FILE_BASE_SUITE_RC:
                source = os.path.dirname(source)
        else:
            source = os.getcwd()

        # suite.rc must exist so we can detect accidentally reversed args.
        source = os.path.abspath(source)
        if not os.path.isfile(os.path.join(source, self.FILE_BASE_SUITE_RC)):
            raise SuiteServiceFileError("no suite.rc in %s" % source)

        # Create service dir if necessary.
        srv_d = self.get_suite_srv_dir(reg)
        os.makedirs(srv_d, exist_ok=True)

        # See if suite already has a source or not
        try:
            orig_source = os.readlink(
                os.path.join(srv_d, self.FILE_BASE_SOURCE))
        except OSError:
            orig_source = None
        else:
            if not os.path.isabs(orig_source):
                orig_source = os.path.normpath(
                    os.path.join(srv_d, orig_source))
        if orig_source is not None and source != orig_source:
            if not redirect:
                raise SuiteServiceFileError(
                    "the name '%s' already points to %s.\nUse "
                    "--redirect to re-use an existing name and run "
                    "directory." % (reg, orig_source))
            LOG.warning(
                "the name '%(reg)s' points to %(old)s.\nIt will now"
                " be redirected to %(new)s.\nFiles in the existing %(reg)s run"
                " directory will be overwritten.\n",
                {'reg': reg, 'old': orig_source, 'new': source})
            # Remove symlink to the original suite.
            os.unlink(os.path.join(srv_d, self.FILE_BASE_SOURCE))

        # Create symlink to the suite, if it doesn't already exist.
        if orig_source is None or source != orig_source:
            target = os.path.join(srv_d, self.FILE_BASE_SOURCE)
            if (os.path.abspath(source) ==
                    os.path.abspath(os.path.dirname(srv_d))):
                # If source happens to be the run directory,
                # create .service/source -> ..
                source_str = ".."
            else:
                source_str = source
            os.symlink(source_str, target)

        print('REGISTERED %s -> %s' % (reg, source))
        return reg
Exemple #42
0
 def _run_event_mail_callback(proc_ctx):
     """Callback the mail command for notification of a suite event."""
     if proc_ctx.ret_code:
         LOG.warning(str(proc_ctx))
     else:
         LOG.info(str(proc_ctx))