Esempio n. 1
0
 def list_suites(self, regfilter=None):
     """Return a filtered list of valid suite registrations."""
     rec_regfilter = None
     if regfilter:
         try:
             rec_regfilter = re.compile(regfilter)
         except re.error as exc:
             raise ValueError("%s: %s" % (regfilter, exc))
     run_d = glbl_cfg().get_host_item('run directory')
     results = []
     for dirpath, dnames, _ in os.walk(run_d, followlinks=True):
         # Always descend for top directory, but
         # don't descend further if it has a .service/ dir
         if dirpath != run_d and self.DIR_BASE_SRV in dnames:
             dnames[:] = []
         # Choose only suites with .service and matching filter
         reg = os.path.relpath(dirpath, run_d)
         path = os.path.join(dirpath, self.DIR_BASE_SRV)
         if (not self._locate_item(self.FILE_BASE_SOURCE, path) or
                 rec_regfilter and not rec_regfilter.search(reg)):
             continue
         try:
             results.append([
                 reg,
                 self.get_suite_source_dir(reg),
                 self.get_suite_title(reg)])
         except (IOError, SuiteServiceFileError) as exc:
             LOG.error('%s: %s', reg, exc)
     return results
Esempio n. 2
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        to_poll_tasks = []
        pollable_statuses = set([
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED])
        if poll_succ:
            pollable_statuses.add(TASK_STATUS_SUCCEEDED)
        for itask in itasks:
            if itask.state.status in pollable_statuses:
                to_poll_tasks.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if to_poll_tasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(
                self.JOBS_POLL, suite, to_poll_tasks,
                self._poll_task_jobs_callback)
Esempio n. 3
0
def addict(cfig, key, val, parents, index):
    """Add a new [parents...]key=value pair to a nested dict."""
    for p in parents:
        # drop down the parent list
        cfig = cfig[p]

    if not isinstance(cfig, dict):
        # an item of this name has already been encountered at this level
        raise FileParseError(
            'line %d: already encountered %s',
            index, itemstr(parents, key, val))

    if key in cfig:
        # this item already exists
        if (key == 'graph' and (
                parents == ['scheduling', 'dependencies'] or
                len(parents) == 3 and
                parents[-3:-1] == ['scheduling', 'dependencies'])):
            # append the new graph string to the existing one
            LOG.debug('Merging graph strings under %s', itemstr(parents))
            if not isinstance(cfig[key], list):
                cfig[key] = [cfig[key]]
            cfig[key].append(val)
        else:
            # otherwise override the existing item
            LOG.debug(
                'overriding %s old value: %s new value: %s',
                itemstr(parents, key), cfig[key], val)
            cfig[key] = val
    else:
        cfig[key] = val
Esempio n. 4
0
 def _job_cmd_out_callback(suite, itask, cmd_ctx, line):
     """Callback on job command STDOUT/STDERR."""
     if cmd_ctx.cmd_kwargs.get("host") and cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("host"):
         owner_at_host = "(%(host)s) " % cmd_ctx.cmd_kwargs
     elif cmd_ctx.cmd_kwargs.get("user"):
         owner_at_host = "(%(user)s@localhost) " % cmd_ctx.cmd_kwargs
     else:
         owner_at_host = ""
     try:
         timestamp, _, content = line.split("|")
     except ValueError:
         pass
     else:
         line = "%s %s" % (timestamp, content)
     job_activity_log = get_task_job_activity_log(
         suite, itask.point, itask.tdef.name)
     try:
         with open(job_activity_log, "ab") as handle:
             if not line.endswith("\n"):
                 line += "\n"
             handle.write((owner_at_host + line).encode())
     except IOError as exc:
         LOG.warning("%s: write failed\n%s" % (job_activity_log, exc))
         LOG.warning("[%s] -%s%s", itask, owner_at_host, line)
Esempio n. 5
0
 def _kill_task_job_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _kill_task_jobs_callback, on one task job."""
     ctx = SubProcContext(self.JOBS_KILL, None)
     ctx.out = line
     try:
         ctx.timestamp, _, ctx.ret_code = line.split("|", 2)
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = int(ctx.ret_code)
         if ctx.ret_code:
             ctx.cmd = cmd_ctx.cmd  # print original command on failure
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
     log_lvl = INFO
     log_msg = 'killed'
     if ctx.ret_code:  # non-zero exit status
         log_lvl = WARNING
         log_msg = 'kill failed'
         itask.state.kill_failed = True
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
             ctx.timestamp)
     elif itask.state.status == TASK_STATUS_RUNNING:
         self.task_events_mgr.process_message(
             itask, CRITICAL, TASK_OUTPUT_FAILED)
     else:
         log_lvl = DEBUG
         log_msg = (
             'ignoring job kill result, unexpected task state: %s' %
             itask.state.status)
     itask.set_summary_message(log_msg)
     LOG.log(log_lvl, "[%s] -job(%02d) %s" % (
         itask.identity, itask.submit_num, log_msg))
Esempio n. 6
0
File: server.py Progetto: cylc/cylc
    def _receiver(self, message):
        """Wrap incoming messages and dispatch them to exposed methods.

        Args:
            message (dict): message contents
        """
        # determine the server method to call
        try:
            method = getattr(self, message['command'])
            args = message['args']
            args.update({'user': message['user']})
            if 'meta' in message:
                args['meta'] = message['meta']
        except KeyError:
            # malformed message
            return {'error': {
                'message': 'Request missing required field(s).'}}
        except AttributeError:
            # no exposed method by that name
            return {'error': {
                'message': 'No method by the name "%s"' % message['command']}}

        # generate response
        try:
            response = method(**args)
        except Exception as exc:
            # includes incorrect arguments (TypeError)
            LOG.exception(exc)  # note the error server side
            import traceback
            return {'error': {
                'message': str(exc), 'traceback': traceback.format_exc()}}

        return {'data': response}
Esempio n. 7
0
File: server.py Progetto: cylc/cylc
 def stop(self):
     """Finish serving the current request then stop the server."""
     LOG.debug('stopping zmq server...')
     self.queue.put('STOP')
     self.thread.join()  # wait for the listener to return
     self.socket.close()
     LOG.debug('...stopped')
Esempio n. 8
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(os.path.join(glbl_cfg().get_derived_host_item(
                self.suite, 'suite run directory', host, owner)))
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
Esempio n. 9
0
 def create_directory(dir_, name):
     """Create directory. Raise GlobalConfigError on error."""
     try:
         os.makedirs(dir_, exist_ok=True)
     except OSError as exc:
         LOG.exception(exc)
         raise GlobalConfigError(
             'Failed to create directory "' + name + '"')
Esempio n. 10
0
 def recover_pub_from_pri(self):
     """Recover public database from private database."""
     if self.pub_dao.n_tries >= self.pub_dao.MAX_TRIES:
         self.copy_pri_to_pub()
         LOG.warning(
             "%(pub_db_name)s: recovered from %(pri_db_name)s" % {
                 "pub_db_name": self.pub_dao.db_file_name,
                 "pri_db_name": self.pri_dao.db_file_name})
         self.pub_dao.n_tries = 0
Esempio n. 11
0
    def clear_broadcast(
            self, point_strings=None, namespaces=None, cancel_settings=None):
        """Clear broadcasts globally, or for listed namespaces and/or points.

        Return a tuple (modified_settings, bad_options), where:
        * modified_settings is similar to the return value of the "put" method,
          but for removed broadcasts.
        * bad_options is a dict in the form:
              {"point_strings": ["20020202", ..."], ...}
          The dict is only populated if there are options not associated with
          previous broadcasts. The keys can be:
          * point_strings: a list of bad point strings.
          * namespaces: a list of bad namespaces.
          * cancel: a list of tuples. Each tuple contains the keys of a bad
            setting.
        """
        # If cancel_settings defined, only clear specific broadcasts
        cancel_keys_list = self._settings_to_keys_list(cancel_settings)

        # Clear broadcasts
        modified_settings = []
        with self.lock:
            for point_string, point_string_settings in self.broadcasts.items():
                if point_strings and point_string not in point_strings:
                    continue
                for namespace, namespace_settings in (
                        point_string_settings.items()):
                    if namespaces and namespace not in namespaces:
                        continue
                    stuff_stack = [([], namespace_settings)]
                    while stuff_stack:
                        keys, stuff = stuff_stack.pop()
                        for key, value in stuff.items():
                            if isinstance(value, dict):
                                stuff_stack.append((keys + [key], value))
                            elif (not cancel_keys_list or
                                    keys + [key] in cancel_keys_list):
                                stuff[key] = None
                                setting = {key: value}
                                for rkey in reversed(keys):
                                    setting = {rkey: setting}
                                modified_settings.append(
                                    (point_string, namespace, setting))

        # Prune any empty branches
        bad_options = self._get_bad_options(
            self._prune(), point_strings, namespaces, cancel_keys_list)

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings, is_cancel=True)
        LOG.info(
            get_broadcast_change_report(modified_settings, is_cancel=True))
        if bad_options:
            LOG.error(get_broadcast_bad_options_report(bad_options))

        return modified_settings, bad_options
Esempio n. 12
0
def addsect(cfig, sname, parents):
    """Add a new section to a nested dict."""
    for p in parents:
        # drop down the parent list
        cfig = cfig[p]
    if sname in cfig:
        # this doesn't warrant a warning unless contained items are repeated
        LOG.debug(
            'Section already encountered: %s', itemstr(parents + [sname]))
    else:
        cfig[sname] = OrderedDictWithDefaults()
Esempio n. 13
0
 def _run_event_custom_handlers(self, config, ctx):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     handlers = self.get_events_conf(config, '%s handler' % ctx.event)
     if not handlers and (
             ctx.event in
             self.get_events_conf(config, 'handler events', [])):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.SUITE_EVENT_HANDLER, i), ctx.event)
         # Handler command may be a string for substitution
         abort_on_error = self.get_events_conf(
             config, 'abort if %s handler fails' % ctx.event)
         try:
             handler_data = {
                 'event': quote(ctx.event),
                 'message': quote(ctx.reason),
                 'suite': quote(ctx.suite),
                 'suite_uuid': quote(str(ctx.uuid_str)),
             }
             if config.cfg['meta']:
                 for key, value in config.cfg['meta'].items():
                     if key == "URL":
                         handler_data["suite_url"] = quote(value)
                     handler_data[key] = quote(value)
             cmd = handler % (handler_data)
         except KeyError as exc:
             message = "%s bad template: %s" % (cmd_key, exc)
             LOG.error(message)
             if abort_on_error:
                 raise SuiteEventError(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = "%s '%s' '%s' '%s'" % (
                 handler, ctx.event, ctx.suite, ctx.reason)
         proc_ctx = SubProcContext(
             cmd_key, cmd, env=dict(os.environ), shell=True)
         if abort_on_error or self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(
                 proc_ctx, abort_on_error=abort_on_error)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, self._run_event_handlers_callback)
Esempio n. 14
0
File: rundb.py Progetto: cylc/cylc
 def execute_queued_items(self):
     """Execute queued items for each table."""
     try:
         for table in self.tables.values():
             # DELETE statements may have varying number of WHERE args so we
             # can only executemany for each identical template statement.
             for stmt, stmt_args_list in table.delete_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
             # INSERT statements are uniform for each table, so all INSERT
             # statements can be executed using a single "executemany" call.
             if table.insert_queue:
                 self._execute_stmt(
                     table.get_insert_stmt(), table.insert_queue)
             # UPDATE statements can have varying number of SET and WHERE
             # args so we can only executemany for each identical template
             # statement.
             for stmt, stmt_args_list in table.update_queues.items():
                 self._execute_stmt(stmt, stmt_args_list)
         # Connection should only be opened if we have executed something.
         if self.conn is None:
             return
         self.conn.commit()
     except sqlite3.Error:
         if not self.is_public:
             raise
         self.n_tries += 1
         LOG.warning(
             "%(file)s: write attempt (%(attempt)d) did not complete\n" % {
                 "file": self.db_file_name, "attempt": self.n_tries})
         if self.conn is not None:
             try:
                 self.conn.rollback()
             except sqlite3.Error:
                 pass
         return
     else:
         # Clear the queues
         for table in self.tables.values():
             table.delete_queues.clear()
             del table.insert_queue[:]  # list.clear avail from Python 3.3
             table.update_queues.clear()
         # Report public database retry recovery if necessary
         if self.n_tries:
             LOG.warning(
                 "%(file)s: recovered after (%(attempt)d) attempt(s)\n" % {
                     "file": self.db_file_name, "attempt": self.n_tries})
         self.n_tries = 0
     finally:
         # Note: This is not strictly necessary. However, if the suite run
         # directory is removed, a forced reconnection to the private
         # database will ensure that the suite dies.
         self.close()
Esempio n. 15
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Esempio n. 16
0
    def _remote_init_callback(self, proc_ctx, platform, tmphandle, curve_auth,
                              client_pub_key_dir):
        """Callback when "cylc remote-init" exits.

        Write public key for install target into client public key
        directory.
        Set remote_init__map status to REMOTE_INIT_DONE on success which
        in turn will trigger file installation to start.
        Set remote_init_map status to REMOTE_INIT_FAILED on error.

        """
        with suppress(OSError):  # E.g. ignore bad unlink, etc
            tmphandle.close()
        install_target = platform['install target']
        if proc_ctx.ret_code == 0 and "KEYSTART" in proc_ctx.out:
            regex_result = re.search('KEYSTART((.|\n|\r)*)KEYEND',
                                     proc_ctx.out)
            key = regex_result.group(1)
            workflow_srv_dir = get_workflow_srv_dir(self.workflow)
            public_key = KeyInfo(KeyType.PUBLIC,
                                 KeyOwner.CLIENT,
                                 workflow_srv_dir=workflow_srv_dir,
                                 install_target=install_target)
            old_umask = os.umask(0o177)
            with open(public_key.full_key_path, 'w',
                      encoding='utf8') as text_file:
                text_file.write(key)
            os.umask(old_umask)
            # configure_curve must be called every time certificates are
            # added or removed, in order to update the Authenticator's
            # state.
            curve_auth.configure_curve(domain='*',
                                       location=(client_pub_key_dir))
            self.remote_init_map[install_target] = REMOTE_INIT_DONE
            self.ready = True
            return
        # Bad status
        LOG.error(
            PlatformError(
                PlatformError.MSG_INIT,
                platform['name'],
                cmd=proc_ctx.cmd,
                ret_code=proc_ctx.ret_code,
                out=proc_ctx.out,
                err=proc_ctx.err,
            ))

        self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
        self.ready = True
Esempio n. 17
0
def load(config, additional_plugins=None):
    additional_plugins = additional_plugins or []
    entry_points = {
        entry_point.name: entry_point
        for entry_point in
        iter_entry_points('cylc.main_loop')
    }
    plugins = {
        'state': {},
        'timings': {}
    }
    for plugin_name in config['plugins'] + additional_plugins:
        # get plugin
        try:
            module_name = entry_points[plugin_name.replace(' ', '_')]
        except KeyError:
            raise UserInputError(
                f'No main-loop plugin: "{plugin_name}"\n'
                + '    Available plugins:\n'
                + indent('\n'.join(sorted(entry_points)), '        ')
            )
        # load plugin
        try:
            module = module_name.load()
        except Exception:
            raise CylcError(f'Could not load plugin: "{plugin_name}"')
        # load coroutines
        log = []
        for coro_name, coro in (
                (coro_name, coro)
                for coro_name, coro in getmembers(module)
                if isfunction(coro)
                if hasattr(coro, 'main_loop')
        ):
            log.append(coro_name)
            plugins.setdefault(
                coro.main_loop, {}
            )[(plugin_name, coro_name)] = coro
            plugins['timings'][(plugin_name, coro_name)] = deque(maxlen=1)
        LOG.debug(
            'Loaded main loop plugin "%s": %s',
            plugin_name + '\n',
            '\n'.join((f'* {x}' for x in log))
        )
        # set the initial state of the plugin
        plugins['state'][plugin_name] = {}
    # make a note of the config here for ease of reference
    plugins['config'] = config
    return plugins
Esempio n. 18
0
    async def async_request(self, command, args=None, timeout=None):
        """Send an asynchronous request using asyncio.

        Has the same arguments and return values as ``serial_request``.

        """
        if timeout:
            timeout = float(timeout)
        timeout = (timeout * 1000 if timeout else None) or self.timeout
        if not args:
            args = {}

        # get secret for this request
        # assumes secret won't change during the request
        try:
            secret = self.secret()
        except cylc.flow.suite_srv_files_mgr.SuiteServiceFileError:
            raise ClientError('could not read suite passphrase')

        # send message
        msg = {'command': command, 'args': args}
        msg.update(self.header)
        LOG.debug('zmq:send %s' % msg)
        message = encrypt(msg, secret)
        self.socket.send_string(message)

        # receive response
        if self.poller.poll(timeout):
            res = await self.socket.recv()
        else:
            if self.timeout_handler:
                self.timeout_handler()
            raise ClientTimeout('Timeout waiting for server response.')

        if msg['command'] in PB_METHOD_MAP:
            response = {'data': res}
        else:
            try:
                response = decrypt(res.decode(), secret)
            except jose.exceptions.JWTError:
                raise ClientError(
                    'Could not decrypt response. Has the passphrase changed?')
        LOG.debug('zmq:recv %s' % response)

        try:
            return response['data']
        except KeyError:
            error = response['error']
            raise ClientError(error['message'], error.get('traceback'))
Esempio n. 19
0
    def test_value_error_raises_system_exit(self, mocked_glbl_cfg):
        """Test that a ValueError when writing to a log stream won't result
        in multiple exceptions (what could lead to infinite loop in some
        occasions. Instead, it **must** raise a SystemExit"""
        with tempfile.NamedTemporaryFile() as tf:
            # mock objects used when creating the file handler
            mocked = mock.MagicMock()
            mocked_glbl_cfg.return_value = mocked
            mocked.get_derived_host_item.return_value = tf.name
            mocked.get.return_value = 100
            file_handler = TimestampRotatingFileHandler("suiteA", False)
            # next line is important as pytest can have a "Bad file descriptor"
            # due to a FileHandler with default "a" (pytest tries to r/w).
            file_handler.mode = "a+"

            # enable the logger
            LOG.setLevel(logging.INFO)
            LOG.addHandler(file_handler)

            # Disable raising uncaught exceptions in logging, due to file
            # handler using stdin.fileno. See the following links for more.
            # https://github.com/pytest-dev/pytest/issues/2276 &
            # https://github.com/pytest-dev/pytest/issues/1585
            logging.raiseExceptions = False

            # first message will initialize the stream and the handler
            LOG.info("What could go")

            # here we change the stream of the handler
            old_stream = file_handler.stream
            file_handler.stream = mock.MagicMock()
            file_handler.stream.seek = mock.MagicMock()
            # in case where
            file_handler.stream.seek.side_effect = ValueError

            try:
                # next call will call the emit method and use the mocked stream
                LOG.info("wrong?!")
                self.fail("Exception SystemError was not raised")
            except SystemExit:
                pass
            finally:
                # clean up
                file_handler.stream = old_stream
                # for log_handler in LOG.handlers:
                #     log_handler.close()
                file_handler.close()
                LOG.removeHandler(file_handler)
                logging.raiseExceptions = True
Esempio n. 20
0
    def call_xtriggers_async(self, itask: TaskProxy):
        """Call itask's xtrigger functions via the process pool...

        ...if previous call not still in-process and retry period is up.


        Args:
            itask: task proxy to check.
        """
        for label, sig, ctx, _ in self._get_xtrigs(itask, unsat_only=True):
            if sig.startswith("wall_clock"):
                # Special case: quick synchronous clock check.
                if 'absolute_as_seconds' not in ctx.func_kwargs:
                    ctx.func_kwargs.update(
                        {'point_as_seconds': itask.get_point_as_seconds()})
                if wall_clock(*ctx.func_args, **ctx.func_kwargs):
                    itask.state.xtriggers[label] = True
                    self.sat_xtrig[sig] = {}
                    self.data_store_mgr.delta_task_xtrigger(sig, True)
                    LOG.info('xtrigger satisfied: %s = %s', label, sig)
                continue
            # General case: potentially slow asynchronous function call.
            if sig in self.sat_xtrig:
                if not itask.state.xtriggers[label]:
                    itask.state.xtriggers[label] = True
                    res = {}
                    for key, val in self.sat_xtrig[sig].items():
                        res["%s_%s" % (label, key)] = val
                    if res:
                        xtrigger_env = [{
                            'environment': {
                                key: val
                            }
                        } for key, val in res.items()]
                        self.broadcast_mgr.put_broadcast([str(itask.point)],
                                                         [itask.tdef.name],
                                                         xtrigger_env)
                continue
            if sig in self.active:
                # Already waiting on this result.
                continue
            now = time()
            if sig in self.t_next_call and now < self.t_next_call[sig]:
                # Too soon to call this one again.
                continue
            self.t_next_call[sig] = now + ctx.intvl
            # Queue to the process pool, and record as active.
            self.active.append(sig)
            self.proc_pool.put_command(ctx, callback=self.callback)
Esempio n. 21
0
    def stop(self, stop_loop=True):
        """Stop the server.

        Args:
            stop_loop (Boolean): Stop running IOLoop of current thread.

        """
        self._bespoke_stop()
        if stop_loop and self.loop and self.loop.is_running():
            self.loop.stop()
        if self.thread and self.thread.is_alive():
            self.thread.join()  # Wait for processes to return
        if self.socket and not self.socket.closed:
            self.socket.close()
        LOG.debug('...stopped')
Esempio n. 22
0
def run_reftest(config, ctx):
    """Run reference test at shutdown."""
    reffilename = config.get_ref_log_name()
    curfilename = get_workflow_test_log_name(ctx.workflow)
    ref = _load_reflog(reffilename)
    cur = _load_reflog(curfilename)
    if ref == cur:
        LOG.info('WORKFLOW REFERENCE TEST PASSED')
    else:
        exc = WorkflowEventError(
            'WORKFLOW REFERENCE TEST FAILED\n'
            'triggering is NOT consistent with the reference log:\n%s\n' %
            '\n'.join(unified_diff(ref, cur, 'reference', 'this run')))
        LOG.exception(exc)
        raise exc
Esempio n. 23
0
 def _event_email_callback(self, proc_ctx, schd_ctx):
     """Call back when email notification command exits."""
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             if proc_ctx.ret_code == 0:
                 del self.event_timers[id_key]
                 log_ctx = SubProcContext((key1, submit_num), None)
                 log_ctx.ret_code = 0
                 log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                       submit_num)
             else:
                 self.event_timers[id_key].unset_waiting()
         except KeyError as exc:
             LOG.exception(exc)
Esempio n. 24
0
def _open_logs(id_, no_detach):
    """Open Cylc log handlers for a flow run."""
    if not no_detach:
        while LOG.handlers:
            LOG.handlers[0].close()
            LOG.removeHandler(LOG.handlers[0])
    log_path = get_workflow_run_log_name(id_)
    LOG.addHandler(
        TimestampRotatingFileHandler(log_path, no_detach)
    )
    # Add file installation log
    file_install_log_path = get_workflow_file_install_log_name(id_)
    RSYNC_LOG.addHandler(
        TimestampRotatingFileHandler(file_install_log_path, no_detach)
    )
Esempio n. 25
0
    def kill_task_jobs(self, suite, itasks):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        to_kill_tasks = []
        for itask in itasks:
            if itask.state(*TASK_STATUSES_ACTIVE):
                itask.state.reset(is_held=True)
                to_kill_tasks.append(itask)
            else:
                LOG.warning('skipping %s: task not killable' % itask.identity)
        self._run_job_cmd(self.JOBS_KILL, suite, to_kill_tasks,
                          self._kill_task_jobs_callback)
Esempio n. 26
0
    def check_task_jobs(self, suite, task_pool):
        """Check submission and execution timeout and polling timers.

        Poll tasks that have timed out and/or have reached next polling time.
        """
        now = time()
        poll_tasks = set()
        for itask in task_pool.get_tasks():
            if self.task_events_mgr.check_job_time(itask, now):
                poll_tasks.add(itask)
                if itask.poll_timer.delay is not None:
                    LOG.info('[%s] -poll now, (next in %s)', itask,
                             itask.poll_timer.delay_timeout_as_str())
        if poll_tasks:
            self.poll_task_jobs(suite, poll_tasks)
Esempio n. 27
0
        def _authorise(self, *args, user='******', meta=None, **kwargs):
            if not meta:
                meta = {}
            host = meta.get('host', '?')
            prog = meta.get('prog', '?')

            usr_priv_level = self._get_priv_level(user)
            if usr_priv_level < req_priv_level:
                LOG.warn(
                    "[client-connect] DENIED (privilege '%s' < '%s') %s@%s:%s",
                    usr_priv_level, req_priv_level, user, host, prog)
                raise Exception('Authorisation failure')
            LOG.info('[client-command] %s %s@%s:%s', fcn.__name__, user, host,
                     prog)
            return fcn(self, *args, **kwargs)
Esempio n. 28
0
File: server.py Progetto: cylc/cylc
        def _authorise(self, *args, user='******', meta=None, **kwargs):
            if not meta:
                meta = {}
            host = meta.get('host', '?')
            prog = meta.get('prog', '?')

            usr_priv_level = self._get_priv_level(user)
            if usr_priv_level < req_priv_level:
                LOG.warn(
                    "[client-connect] DENIED (privilege '%s' < '%s') %s@%s:%s",
                    usr_priv_level, req_priv_level, user, host, prog)
                raise Exception('Authorisation failure')
            LOG.info(
                '[client-command] %s %s@%s:%s', fcn.__name__, user, host, prog)
            return fcn(self, *args, **kwargs)
Esempio n. 29
0
def process_mail_footer(
    mail_footer_tmpl: str,
    template_vars,
) -> str:
    """Process mail footer for workflow or task events.

    Returns an empty string if issues occur in processing.
    """
    try:
        return (mail_footer_tmpl + '\n') % template_vars
    except (KeyError, ValueError):
        LOG.warning(
            f'Ignoring bad mail footer template: {mail_footer_tmpl}'
        )
    return ''
Esempio n. 30
0
    def upgrade_retry_state(self):
        """Replace the retry state with xtriggers.

        * Change *retrying tasks to waiting
        * Add the required xtrigger

        Note:
            The retry status can be safely removed as this is really a display
            state, the retry logic revolves around the TaskActionTimer.

        From:
            cylc<8
        To:
            cylc>=8
        PR:
            #3423

        Returns:
            list - (cycle, name, status) tuples of all retrying tasks.

        """
        conn = self.connect()

        for table in [self.TABLE_TASK_POOL_CHECKPOINTS, self.TABLE_TASK_POOL]:
            tasks = list(
                conn.execute(rf'''
                    SELECT
                        cycle, name, status
                    FROM
                        {table}
                    WHERE
                        status IN ('retrying', 'submit-retrying')
                '''))
            if tasks:
                LOG.info(f'Upgrade retrying tasks in table {table}')
            conn.executemany(
                rf'''
                    UPDATE
                        {table}
                    SET
                        status='{TASK_STATUS_WAITING}'
                    WHERE
                        cycle==?
                        and name==?
                        and status==?
                ''', tasks)
        conn.commit()
        return tasks
Esempio n. 31
0
    def parse_args(self, api_args, remove_opts=None):
        """Parse options and arguments, overrides OptionParser.parse_args.

        Args:
            api_args (list):
                Command line options if passed via Python as opposed to
                sys.argv
            remove_opts (list):
                List of standard options to remove before parsing.

        """
        if self.auto_add:
            # Add common options after command-specific options.
            self.add_std_options()

        if remove_opts:
            for opt in remove_opts:
                try:
                    self.remove_option(opt)
                except ValueError:
                    pass

        (options, args) = OptionParser.parse_args(self, api_args)

        if len(args) < self.n_compulsory_args:
            self.error("Wrong number of arguments (too few)")

        elif not self.unlimited_args and \
                len(args) > self.n_compulsory_args + self.n_optional_args:
            self.error("Wrong number of arguments (too many)")

        if self.jset:
            if options.templatevars_file:
                options.templatevars_file = os.path.abspath(os.path.expanduser(
                    options.templatevars_file))

        cylc.flow.flags.verbosity = options.verbosity

        # Set up stream logging for CLI. Note:
        # 1. On choosing STDERR: Log messages are diagnostics, so STDERR is the
        #    better choice for the logging stream. This allows us to use STDOUT
        #    for verbosity agnostic outputs.
        # 2. Scheduler will remove this handler when it becomes a daemon.
        if options.verbosity > 1:
            LOG.setLevel(logging.DEBUG)
        else:
            LOG.setLevel(logging.INFO)
        # Remove NullHandler before add the StreamHandler
        RSYNC_LOG.setLevel(logging.INFO)
        while LOG.handlers:
            LOG.handlers[0].close()
            LOG.removeHandler(LOG.handlers[0])
        errhandler = logging.StreamHandler(sys.stderr)
        errhandler.setFormatter(CylcLogFormatter(
            timestamp=options.log_timestamp))
        LOG.addHandler(errhandler)

        return (options, args)
Esempio n. 32
0
def test_value_error_raises_system_exit(mocked_glbl_cfg, ):
    """Test that a ValueError when writing to a log stream won't result
    in multiple exceptions (what could lead to infinite loop in some
    occasions. Instead, it **must** raise a SystemExit"""
    with tempfile.NamedTemporaryFile() as tf:
        # mock objects used when creating the file handler
        mocked = mock.MagicMock()
        mocked_glbl_cfg.return_value = mocked
        mocked.get.return_value = 100
        file_handler = TimestampRotatingFileHandler(tf.name, False)
        # next line is important as pytest can have a "Bad file descriptor"
        # due to a FileHandler with default "a" (pytest tries to r/w).
        file_handler.mode = "a+"

        # enable the logger
        LOG.setLevel(logging.INFO)
        LOG.addHandler(file_handler)

        # Disable raising uncaught exceptions in logging, due to file
        # handler using stdin.fileno. See the following links for more.
        # https://github.com/pytest-dev/pytest/issues/2276 &
        # https://github.com/pytest-dev/pytest/issues/1585
        logging.raiseExceptions = False

        # first message will initialize the stream and the handler
        LOG.info("What could go")

        # here we change the stream of the handler
        old_stream = file_handler.stream
        file_handler.stream = mock.MagicMock()
        file_handler.stream.seek = mock.MagicMock()
        # in case where
        file_handler.stream.seek.side_effect = ValueError

        try:
            # next call will call the emit method and use the mocked stream
            LOG.info("wrong?!")
            raise Exception("Exception SystemError was not raised")
        except SystemExit:
            pass
        finally:
            # clean up
            file_handler.stream = old_stream
            # for log_handler in LOG.handlers:
            #     log_handler.close()
            file_handler.close()
            LOG.removeHandler(file_handler)
            logging.raiseExceptions = True
Esempio n. 33
0
    def put_broadcast(
            self, point_strings=None, namespaces=None, settings=None):
        """Add new broadcast settings (server side interface).

        Return a tuple (modified_settings, bad_options) where:
          modified_settings is list of modified settings in the form:
            [("20200202", "foo", {"script": "true"}, ...]
          bad_options is as described in the docstring for self.clear().
        """
        modified_settings = []
        bad_point_strings = []
        bad_namespaces = []

        with self.lock:
            for setting in settings:
                for point_string in point_strings:
                    # Standardise the point and check its validity.
                    bad_point = False
                    try:
                        point_string = standardise_point_string(point_string)
                    except PointParsingError:
                        if point_string != '*':
                            bad_point_strings.append(point_string)
                            bad_point = True
                    if not bad_point and point_string not in self.broadcasts:
                        self.broadcasts[point_string] = {}
                    for namespace in namespaces:
                        if namespace not in self.linearized_ancestors:
                            bad_namespaces.append(namespace)
                        elif not bad_point:
                            if namespace not in self.broadcasts[point_string]:
                                self.broadcasts[point_string][namespace] = {}
                            self._addict(
                                self.broadcasts[point_string][namespace],
                                setting)
                            modified_settings.append(
                                (point_string, namespace, setting))

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings)
        LOG.info(get_broadcast_change_report(modified_settings))

        bad_options = {}
        if bad_point_strings:
            bad_options["point_strings"] = bad_point_strings
        if bad_namespaces:
            bad_options["namespaces"] = bad_namespaces
        return modified_settings, bad_options
Esempio n. 34
0
    def put_broadcast(
            self, point_strings=None, namespaces=None, settings=None):
        """Add new broadcast settings (server side interface).

        Return a tuple (modified_settings, bad_options) where:
          modified_settings is list of modified settings in the form:
            [("20200202", "foo", {"script": "true"}, ...]
          bad_options is as described in the docstring for self.clear().
        """
        modified_settings = []
        bad_point_strings = []
        bad_namespaces = []

        with self.lock:
            for setting in settings:
                for point_string in point_strings:
                    # Standardise the point and check its validity.
                    bad_point = False
                    try:
                        point_string = standardise_point_string(point_string)
                    except PointParsingError:
                        if point_string != '*':
                            bad_point_strings.append(point_string)
                            bad_point = True
                    if not bad_point and point_string not in self.broadcasts:
                        self.broadcasts[point_string] = {}
                    for namespace in namespaces:
                        if namespace not in self.linearized_ancestors:
                            bad_namespaces.append(namespace)
                        elif not bad_point:
                            if namespace not in self.broadcasts[point_string]:
                                self.broadcasts[point_string][namespace] = {}
                            self._addict(
                                self.broadcasts[point_string][namespace],
                                setting)
                            modified_settings.append(
                                (point_string, namespace, setting))

        # Log the broadcast
        self.suite_db_mgr.put_broadcast(modified_settings)
        LOG.info(get_broadcast_change_report(modified_settings))

        bad_options = {}
        if bad_point_strings:
            bad_options["point_strings"] = bad_point_strings
        if bad_namespaces:
            bad_options["namespaces"] = bad_namespaces
        return modified_settings, bad_options
Esempio n. 35
0
    def check_task_jobs(self, suite, task_pool):
        """Check submission and execution timeout and polling timers.

        Poll tasks that have timed out and/or have reached next polling time.
        """
        now = time()
        poll_tasks = set()
        for itask in task_pool.get_tasks():
            if self.task_events_mgr.check_job_time(itask, now):
                poll_tasks.add(itask)
                if itask.poll_timer.delay is not None:
                    LOG.info(
                        '[%s] -poll now, (next in %s)',
                        itask, itask.poll_timer.delay_timeout_as_str())
        if poll_tasks:
            self.poll_task_jobs(suite, poll_tasks)
Esempio n. 36
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if LOG.isEnabledFor(DEBUG):
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(get_remote_suite_run_job_dir(host, owner, suite))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(
                    get_task_job_id(itask.point, itask.tdef.name,
                                    itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(SubProcContext(cmd_key, cmd), callback,
                                       [suite, itasks])
Esempio n. 37
0
 def get_new_flow(self, description: Optional[str] = None) -> int:
     """Increment flow counter, record flow metadata."""
     self.counter += 1
     # record start time to nearest second
     now = datetime.datetime.now()
     now_sec: str = str(now -
                        datetime.timedelta(microseconds=now.microsecond))
     description = description or "no description"
     self.flows[self.counter] = {
         "description": description,
         "start_time": now_sec
     }
     LOG.info(f"New flow: {self.counter} " f"({description}) " f"{now_sec}")
     self.db_mgr.put_insert_workflow_flows(self.counter,
                                           self.flows[self.counter])
     return self.counter
Esempio n. 38
0
    def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys):
        """Process retrieval of task job logs from remote user@host."""
        if ctx.user_at_host and "@" in ctx.user_at_host:
            s_user, s_host = ctx.user_at_host.split("@", 1)
        else:
            s_user, s_host = (None, ctx.user_at_host)
        ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user))
        rsync_str = str(glbl_cfg().get_host_item("retrieve job logs command",
                                                 s_host, s_user))

        cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str]
        if LOG.isEnabledFor(DEBUG):
            cmd.append("-v")
        if ctx.max_size:
            cmd.append("--max-size=%s" % (ctx.max_size, ))
        # Includes and excludes
        includes = set()
        for _, point, name, submit_num in id_keys:
            # Include relevant directories, all levels needed
            includes.add("/%s" % (point))
            includes.add("/%s/%s" % (point, name))
            includes.add("/%s/%s/%02d" % (point, name, submit_num))
            includes.add("/%s/%s/%02d/**" % (point, name, submit_num))
        cmd += ["--include=%s" % (include) for include in sorted(includes)]
        cmd.append("--exclude=/**")  # exclude everything else
        # Remote source
        cmd.append(
            "%s:%s/" %
            (ctx.user_at_host,
             get_remote_suite_run_job_dir(s_host, s_user, schd_ctx.suite)))
        # Local target
        cmd.append(get_suite_run_job_dir(schd_ctx.suite) + "/")
        self.proc_pool.put_command(
            SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys),
            self._job_logs_retrieval_callback, [schd_ctx])
Esempio n. 39
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if LOG.isEnabledFor(DEBUG):
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, "suite job log directory", host, owner))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(get_task_job_id(
                    itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SubProcContext(cmd_key, cmd), callback, [suite, itasks])
Esempio n. 40
0
 def _get_job_scripts(itask, rtconfig):
     """Return pre-script, script, post-script for a job."""
     script = rtconfig['script']
     pre_script = rtconfig['pre-script']
     post_script = rtconfig['post-script']
     if itask.tdef.suite_polling_cfg:
         # Automatic suite state polling script
         comstr = "cylc suite-state " + \
                  " --task=" + itask.tdef.suite_polling_cfg['task'] + \
                  " --point=" + str(itask.point)
         if LOG.isEnabledFor(DEBUG):
             comstr += ' --debug'
         for key, fmt in [('user', ' --%s=%s'), ('host', ' --%s=%s'),
                          ('interval', ' --%s=%d'),
                          ('max-polls', ' --%s=%s'),
                          ('run-dir', ' --%s=%s')]:
             if rtconfig['suite state polling'][key]:
                 comstr += fmt % (key, rtconfig['suite state polling'][key])
         if rtconfig['suite state polling']['message']:
             comstr += " --message='%s'" % (
                 rtconfig['suite state polling']['message'])
         else:
             comstr += " --status=" + itask.tdef.suite_polling_cfg['status']
         comstr += " " + itask.tdef.suite_polling_cfg['suite']
         script = "echo " + comstr + "\n" + comstr
     return pre_script, script, post_script
Esempio n. 41
0
 def _get_job_scripts(itask, rtconfig):
     """Return pre-script, script, post-script for a job."""
     script = rtconfig['script']
     pre_script = rtconfig['pre-script']
     post_script = rtconfig['post-script']
     if itask.tdef.suite_polling_cfg:
         # Automatic suite state polling script
         comstr = "cylc suite-state " + \
                  " --task=" + itask.tdef.suite_polling_cfg['task'] + \
                  " --point=" + str(itask.point)
         if LOG.isEnabledFor(DEBUG):
             comstr += ' --debug'
         for key, fmt in [
                 ('user', ' --%s=%s'),
                 ('host', ' --%s=%s'),
                 ('interval', ' --%s=%d'),
                 ('max-polls', ' --%s=%s'),
                 ('run-dir', ' --%s=%s')]:
             if rtconfig['suite state polling'][key]:
                 comstr += fmt % (key, rtconfig['suite state polling'][key])
         if rtconfig['suite state polling']['message']:
             comstr += " --message='%s'" % (
                 rtconfig['suite state polling']['message'])
         else:
             comstr += " --status=" + itask.tdef.suite_polling_cfg['status']
         comstr += " " + itask.tdef.suite_polling_cfg['suite']
         script = "echo " + comstr + "\n" + comstr
     return pre_script, script, post_script
Esempio n. 42
0
    def kill_task_jobs(self, suite, itasks):
        """Kill jobs of active tasks, and hold the tasks.

        If items is specified, kill active tasks matching given IDs.

        """
        to_kill_tasks = []
        for itask in itasks:
            if itask.state.status in TASK_STATUSES_ACTIVE:
                itask.state.set_held()
                to_kill_tasks.append(itask)
            else:
                LOG.warning('skipping %s: task not killable' % itask.identity)
        self._run_job_cmd(
            self.JOBS_KILL, suite, to_kill_tasks,
            self._kill_task_jobs_callback)
Esempio n. 43
0
def _open_logs(reg, no_detach):
    """Open Cylc log handlers for a flow run."""
    if not no_detach:
        while LOG.handlers:
            LOG.handlers[0].close()
            LOG.removeHandler(LOG.handlers[0])
    suite_log_handler = get_suite_run_log_name(reg)
    LOG.addHandler(
        TimestampRotatingFileHandler(
            suite_log_handler,
            no_detach))

    # Add file installation log
    file_install_log_path = get_suite_file_install_log_name(reg)
    handler = TimestampRotatingFileHandler(file_install_log_path, no_detach)
    RSYNC_LOG.addHandler(handler)
Esempio n. 44
0
 def _manip_task_jobs_callback(self,
                               ctx,
                               suite,
                               itasks,
                               summary_callback,
                               more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning('Unhandled %s output: %s', ctx.cmd_key,
                                 line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = ("|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Esempio n. 45
0
    def satisfy_xtriggers(self, itask: TaskProxy):
        """Attempt to satisfy itask's xtriggers.

        Args:
            itask (TaskProxy): TaskProxy
        """
        for label, sig, ctx, _ in self._get_xtrigs(itask, unsat_only=True):
            if sig.startswith("wall_clock"):
                # Special case: synchronous clock check.
                ctx.func_kwargs.update({
                    'point_as_seconds':
                    itask.get_point_as_seconds(),
                })
                if wall_clock(*ctx.func_args, **ctx.func_kwargs):
                    itask.state.xtriggers[label] = True
                    self.sat_xtrig[sig] = {}
                    LOG.info('xtrigger satisfied: %s = %s', label, sig)
                continue
            # General case: asynchronous xtrigger function call.
            if sig in self.sat_xtrig:

                if not itask.state.xtriggers[label]:
                    itask.state.xtriggers[label] = True
                    res = {}
                    for key, val in self.sat_xtrig[sig].items():
                        res["%s_%s" % (label, key)] = val
                    if res:
                        xtrigger_env = [{
                            'environment': {
                                key: val
                            }
                        } for key, val in res.items()]
                        self.broadcast_mgr.put_broadcast([str(ctx.point)],
                                                         [itask.tdef.name],
                                                         xtrigger_env)
                continue
            if sig in self.active:
                # Already waiting on this result.
                continue
            now = time()
            if sig in self.t_next_call and now < self.t_next_call[sig]:
                # Too soon to call this one again.
                continue
            self.t_next_call[sig] = now + ctx.intvl
            # Queue to the process pool, and record as active.
            self.active.append(sig)
            self.proc_pool.put_command(ctx, self.callback)
Esempio n. 46
0
def apply_delta(key, delta, data):
    """Apply delta to specific data-store workflow and type."""
    # Assimilate new data
    if getattr(delta, 'added', False):
        if key != WORKFLOW:
            data[key].update({e.id: e for e in delta.added})
        elif delta.added.ListFields():
            data[key].CopyFrom(delta.added)
    # Merge in updated fields
    if getattr(delta, 'updated', False):
        if key == WORKFLOW:
            # Clear fields that require overwrite with delta
            field_set = {f.name for f, _ in delta.updated.ListFields()}
            for field in CLEAR_FIELD_MAP[key]:
                if field in field_set:
                    data[key].ClearField(field)
            data[key].MergeFrom(delta.updated)
        else:
            for element in delta.updated:
                try:
                    # Clear fields that require overwrite with delta
                    if CLEAR_FIELD_MAP[key]:
                        for field, _ in element.ListFields():
                            if field.name in CLEAR_FIELD_MAP[key]:
                                data[key][element.id].ClearField(field.name)
                    data[key][element.id].MergeFrom(element)
                except KeyError as exc:
                    # Ensure data-sync doesn't fail with
                    # network issues, sync reconcile/validate will catch.
                    LOG.debug('Missing Data-Store element '
                              'on update application: %s' % str(exc))
                    continue
    # Prune data elements
    if hasattr(delta, 'pruned'):
        # Prune data elements by id
        for del_id in delta.pruned:
            if del_id not in data[key]:
                continue
            if key == TASK_PROXIES:
                data[TASKS][data[key][del_id].task].proxies.remove(del_id)
                getattr(data[WORKFLOW], key).remove(del_id)
            elif key == FAMILY_PROXIES:
                data[FAMILIES][data[key][del_id].family].proxies.remove(del_id)
                getattr(data[WORKFLOW], key).remove(del_id)
            elif key == EDGES:
                getattr(data[WORKFLOW], key).edges.remove(del_id)
            del data[key][del_id]
Esempio n. 47
0
 def _run_event_custom_handlers(self, schd, template_variables, event):
     """Helper for "run_event_handlers", custom event handlers."""
     # Look for event handlers
     # 1. Handlers for specific event
     # 2. General handlers
     config = schd.config
     handlers = self.get_events_conf(config, '%s handlers' % event)
     if not handlers and (
         event in
         self.get_events_conf(config, 'handler events', [])
     ):
         handlers = self.get_events_conf(config, 'handlers')
     if not handlers:
         return
     for i, handler in enumerate(handlers):
         cmd_key = ('%s-%02d' % (self.WORKFLOW_EVENT_HANDLER, i), event)
         try:
             cmd = handler % (template_variables)
         except KeyError as exc:
             message = f'{cmd_key} bad template: {handler}\n{exc}'
             LOG.error(message)
             continue
         if cmd == handler:
             # Nothing substituted, assume classic interface
             cmd = (
                 f"%(handler)s"
                 f" '%({EventData.Event.value})s'"
                 f" '%({EventData.Workflow.value})s'"
                 f" '%({EventData.Message.value})s'"
             ) % (
                 {'handler': handler, **template_variables}
             )
         proc_ctx = SubProcContext(
             cmd_key,
             cmd,
             env=dict(os.environ),
             shell=True  # nosec (designed to run user defined code)
         )
         if self.proc_pool.closed:
             # Run command in foreground if abort on failure is set or if
             # process pool is closed
             self.proc_pool.run_command(proc_ctx)
             self._run_event_handlers_callback(proc_ctx)
         else:
             # Run command using process pool otherwise
             self.proc_pool.put_command(
                 proc_ctx, callback=self._run_event_handlers_callback)
Esempio n. 48
0
    def _get_host_metrics(self):
        """Run "cylc get-host-metrics" commands on hosts.

        Return (dict): {host: host-metrics-dict, ...}
        """
        host_stats = {}
        # Run "cylc get-host-metrics" commands on hosts
        host_proc_map = {}
        cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts())
        # Start up commands on hosts
        for host in self.hosts:
            if is_remote_host(host):
                host_proc_map[host] = remote_cylc_cmd(
                    cmd, stdin=None, host=host, capture_process=True)
            elif 'localhost' in host_proc_map:
                continue  # Don't duplicate localhost
            else:
                # 1st instance of localhost
                host_proc_map['localhost'] = run_cmd(
                    ['cylc'] + cmd, capture_process=True)
        # Collect results from commands
        while host_proc_map:
            for host, proc in list(host_proc_map.copy().items()):
                if proc.poll() is None:
                    continue
                del host_proc_map[host]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    # Command failed in verbose/debug mode
                    LOG.warning(
                        "can't get host metric from '%s'" +
                        "%s  # returncode=%d, err=%s\n",
                        host, ' '.join((quote(item) for item in cmd)),
                        proc.returncode, err)
                else:
                    # Command OK
                    # Users may have profile scripts that write to STDOUT.
                    # Drop all output lines until the the first character of a
                    # line is '{'. Hopefully this is enough to find us the
                    # first line that denotes the beginning of the expected
                    # JSON data structure.
                    out = ''.join(dropwhile(
                        lambda s: not s.startswith('{'), out.splitlines(True)))
                    host_stats[host] = json.loads(out)
            sleep(0.01)
        return host_stats
Esempio n. 49
0
def _can_auto_restart():
    """Determine whether this workflow can safely auto stop-restart."""
    # Check whether there is currently an available host to restart on.
    try:
        select_workflow_host(cached=False)
    except HostSelectException:
        LOG.critical('Workflow cannot automatically restart because:\n' +
                     'No alternative host to restart workflow on.')
        return False
    except Exception:
        # Any unexpected error in host selection shouldn't be able to take
        # down the workflow.
        LOG.critical('Workflow cannot automatically restart because:\n' +
                     'Error in host selection:\n' + traceback.format_exc())
        return False
    else:
        return True
Esempio n. 50
0
    def _listener(self):
        """The server main loop, listen for and serve requests."""
        while True:
            # process any commands passed to the listener by its parent process
            if self.queue.qsize():
                command = self.queue.get()
                if command == 'STOP':
                    break
                else:
                    raise ValueError('Unknown command "%s"' % command)

            try:
                # wait RECV_TIMEOUT for a message
                msg = self.socket.recv_string()
            except zmq.error.Again:
                # timeout, continue with the loop, this allows the listener
                # thread to stop
                continue

            # attempt to decode the message, authenticating the user in the
            # process
            try:
                message = self.decode(msg, self.secret())
            except Exception as exc:  # purposefully catch generic exception
                # failed to decode message, possibly resulting from failed
                # authentication
                import traceback
                return {
                    'error': {
                        'message': str(exc),
                        'traceback': traceback.format_exc()
                    }
                }
            else:
                # success case - serve the request
                LOG.debug('zmq:recv %s', message)
                res = self._receiver(message)
                if message['command'] in PB_METHOD_MAP:
                    response = res['data']
                else:
                    response = self.encode(res, self.secret()).encode()
                LOG.debug('zmq:send %s', res)
                # send back the string to bytes response
                self.socket.send(response)

            sleep(0)  # yield control to other threads
Esempio n. 51
0
    def set_job_time(self, job_d, event_key, time_str=None):
        """Set an event time in job pool object.

        Set values of both event_key + '_time' and event_key + '_time_string'.
        """
        update_time = time()
        point, name, sub_num = self.parse_job_item(job_d)
        j_id = (f'{self.workflow_id}{ID_DELIM}{point}'
                f'{ID_DELIM}{name}{ID_DELIM}{sub_num}')
        try:
            j_delta = PbJob(stamp=f'{j_id}@{update_time}')
            time_attr = f'{event_key}_time'
            setattr(j_delta, time_attr, time_str)
            self.updates.setdefault(j_id, PbJob(id=j_id)).MergeFrom(j_delta)
            self.updates_pending = True
        except (TypeError, AttributeError) as exc:
            LOG.error(f'Unable to set {j_id} {time_attr} field: {str(exc)}')
Esempio n. 52
0
def clean(reg):
    """Remove a stopped workflow from the local filesystem only.

    Deletes the workflow run directory and any symlink dirs. Note: if the
    run dir has already been manually deleted, it will not be possible to
    clean the symlink dirs.

    Args:
        reg (str): Workflow name.
    """
    run_dir = Path(get_workflow_run_dir(reg))
    try:
        _clean_check(reg, run_dir)
    except FileNotFoundError as exc:
        LOG.info(str(exc))
        return

    # Note: 'share/cycle' must come first, and '' must come last
    for possible_symlink in (
            SuiteFiles.SHARE_CYCLE_DIR, SuiteFiles.SHARE_DIR,
            SuiteFiles.LOG_DIR, SuiteFiles.WORK_DIR, ''):
        name = Path(possible_symlink)
        path = Path(run_dir, possible_symlink)
        if path.is_symlink():
            # Ensure symlink is pointing to expected directory. If not,
            # something is wrong and we should abort
            target = path.resolve()
            if target.exists() and not target.is_dir():
                raise WorkflowFilesError(
                    f'Invalid Cylc symlink directory {path} -> {target}\n'
                    f'Target is not a directory')
            expected_end = str(Path('cylc-run', reg, name))
            if not str(target).endswith(expected_end):
                raise WorkflowFilesError(
                    f'Invalid Cylc symlink directory {path} -> {target}\n'
                    f'Expected target to end with "{expected_end}"')
            # Remove <symlink_dir>/cylc-run/<reg>
            target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0]
            target_reg_dir = Path(target_cylc_run_dir, reg)
            if target_reg_dir.is_dir():
                remove_dir(target_reg_dir)
            # Remove empty parents
            _remove_empty_reg_parents(reg, target_reg_dir)

    remove_dir(run_dir)
    _remove_empty_reg_parents(reg, run_dir)
Esempio n. 53
0
File: client.py Progetto: cylc/cylc
    async def async_request(self, command, args=None, timeout=None):
        """Send an asynchronous request using asyncio.

        Has the same arguments and return values as ``serial_request``.

        """
        if timeout:
            timeout = float(timeout)
        timeout = (timeout * 1000 if timeout else None) or self.timeout
        if not args:
            args = {}

        # get secret for this request
        # assumes secret won't change during the request
        try:
            secret = self.secret()
        except cylc.flow.suite_srv_files_mgr.SuiteServiceFileError:
            raise ClientError('could not read suite passphrase')

        # send message
        msg = {'command': command, 'args': args}
        msg.update(self.header)
        LOG.debug('zmq:send %s' % msg)
        message = encrypt(msg, secret)
        self.socket.send_string(message)

        # receive response
        if self.poller.poll(timeout):
            res = await self.socket.recv_string()
        else:
            if self.timeout_handler:
                self.timeout_handler()
            raise ClientTimeout('Timeout waiting for server response.')

        try:
            response = decrypt(res, secret)
            LOG.debug('zmq:recv %s' % response)
        except jose.exceptions.JWTError:
            raise ClientError(
                'Could not decrypt response. Has the passphrase changed?')

        try:
            return response['data']
        except KeyError:
            error = response['error']
            raise ClientError(error['message'], error.get('traceback'))
Esempio n. 54
0
 def _run_command_init(cls, ctx, callback=None, callback_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         stdin_file.write(open(file_, 'rb').read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(ctx.cmd_kwargs['stdin_files'][0], 'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = DEVNULL
         proc = procopen(
             ctx.cmd,
             stdin=stdin_file,
             stdoutpipe=True,
             stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except (IOError, OSError) as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx, callback, callback_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Esempio n. 55
0
File: upgrade.py Progetto: cylc/cylc
    def upgrade(self):
        warnings = OrderedDict()
        for vn, upgs in self.upgrades.items():
            for u in upgs:
                try:
                    exp = self.expand(u)
                except (KeyError, UpgradeError):
                    continue

                for upg in exp:
                    try:
                        old = self.get_item(upg['old'])
                    except KeyError:
                        # OK: deprecated item not found
                        pass
                    else:
                        msg = self.show_keys(upg['old'])
                        if upg['new']:
                            msg += ' -> ' + self.show_keys(upg['new'])
                        else:
                            upg['new'] = upg['old']
                        msg += " - " + upg['cvt'].describe()
                        if not upg['silent']:
                            warnings.setdefault(vn, [])
                            warnings[vn].append(msg)
                        self.del_item(upg['old'])
                        if upg['cvt'].describe() != "DELETED (OBSOLETE)":
                            self.put_item(upg['new'], upg['cvt'].convert(old))
        if warnings:
            level = WARNING
            if self.descr == self.SITE_CONFIG:
                # Site level configuration, user cannot easily fix.
                # Only log at debug level.
                level = DEBUG
            else:
                # User level configuration, user should be able to fix.
                # Log at warning level.
                level = WARNING
            LOG.log(
                level,
                "deprecated items were automatically upgraded in '%s':",
                self.descr)
            for vn, msgs in warnings.items():
                for msg in msgs:
                    LOG.log(level, ' * (%s) %s', vn, msg)
Esempio n. 56
0
 def _manip_task_jobs_callback(
         self, ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning(
                         'Unhandled %s output: %s', ctx.cmd_key, line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = (
             "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Esempio n. 57
0
 def _run_command_init(cls, ctx, callback=None, callback_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         stdin_file.write(open(file_, 'rb').read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(
                     ctx.cmd_kwargs['stdin_files'][0], 'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = open(os.devnull)
         proc = procopen(
             ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except (IOError, OSError) as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx, callback, callback_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Esempio n. 58
0
    def _dump_item(path, item, value):
        """Dump "value" to a file called "item" in the directory "path".

        1. File permission should already be user-read-write-only on
           creation by mkstemp.
        2. The combination of os.fsync and os.rename should guarantee
           that we don't end up with an incomplete file.
        """
        os.makedirs(path, exist_ok=True)
        from tempfile import NamedTemporaryFile
        handle = NamedTemporaryFile(prefix=item, dir=path, delete=False)
        try:
            handle.write(value.encode())
        except AttributeError:
            handle.write(value)
        os.fsync(handle.fileno())
        handle.close()
        fname = os.path.join(path, item)
        os.rename(handle.name, fname)
        LOG.debug('Generated %s', fname)
Esempio n. 59
0
    def test_ioerror_is_ignored(self, mocked_suite_srv_files_mgr,
                                mocked_suite_db_mgr, mocked_broadcast_mgr):
        """Test that IOError's are ignored when closing Scheduler logs.
        When a disk errors occurs, the scheduler.close_logs method may
        result in an IOError. This, combined with other variables, may cause
        an infinite loop. So it is better that it is ignored."""
        mocked_suite_srv_files_mgr.return_value\
            .get_suite_source_dir.return_value = "."
        options = Options()
        args = ["suiteA"]
        scheduler = Scheduler(is_restart=False, options=options, args=args)

        handler = mock.MagicMock()
        handler.close.side_effect = IOError
        handler.level = logging.INFO
        LOG.addHandler(handler)

        scheduler.close_logs()
        self.assertEqual(1, handler.close.call_count)
        LOG.removeHandler(handler)
Esempio n. 60
0
    def _rank_good_hosts(self, all_host_stats):
        """Rank, by specified method, 'good' hosts to return the most suitable.

        Take a dictionary of hosts considered 'good' with the corresponding
        metric data, and rank them via the method specified in the global
        configuration, returning the lowest-ranked (taken as best) host.
        """
        # Convert all dict values from full metrics structures to single
        # metric data values corresponding to the rank method to rank with.
        hosts_with_vals_to_rank = dict(
            (host, metric[self.rank_method])
            for host, metric in all_host_stats.items())
        LOG.debug(
            "INFO: host %s values extracted are: %s",
            self.rank_method,
            "\n".join("  %s: %s" % item
                      for item in hosts_with_vals_to_rank.items()))

        # Sort new dict by value to return ascending-value ordered host list.
        sort_asc_hosts = sorted(
            hosts_with_vals_to_rank, key=hosts_with_vals_to_rank.get)
        base_msg = ("good (metric-returning) hosts were ranked in the "
                    "following order, from most to least suitable: %s")
        if self.rank_method in ("memory", "disk-space:" + self.USE_DISK_PATH):
            # Want 'most free' i.e. highest => reverse asc. list for ranking.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts[::-1]))
            return sort_asc_hosts[-1]
        else:  # A load av. is only poss. left; 'random' dealt with earlier.
            # Want lowest => ranking given by asc. list.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts))
            return sort_asc_hosts[0]