Esempio n. 1
0
def addict(cfig, key, val, parents, index):
    """Add a new [parents...]key=value pair to a nested dict."""
    for p in parents:
        # drop down the parent list
        cfig = cfig[p]

    if not isinstance(cfig, dict):
        # an item of this name has already been encountered at this level
        raise FileParseError(
            'line %d: already encountered %s',
            index, itemstr(parents, key, val))

    if key in cfig:
        # this item already exists
        if (key == 'graph' and (
                parents == ['scheduling', 'dependencies'] or
                len(parents) == 3 and
                parents[-3:-1] == ['scheduling', 'dependencies'])):
            # append the new graph string to the existing one
            LOG.debug('Merging graph strings under %s', itemstr(parents))
            if not isinstance(cfig[key], list):
                cfig[key] = [cfig[key]]
            cfig[key].append(val)
        else:
            # otherwise override the existing item
            LOG.debug(
                'overriding %s old value: %s new value: %s',
                itemstr(parents, key), cfig[key], val)
            cfig[key] = val
    else:
        cfig[key] = val
Esempio n. 2
0
File: server.py Progetto: cylc/cylc
 def stop(self):
     """Finish serving the current request then stop the server."""
     LOG.debug('stopping zmq server...')
     self.queue.put('STOP')
     self.thread.join()  # wait for the listener to return
     self.socket.close()
     LOG.debug('...stopped')
Esempio n. 3
0
    def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None):
        """Poll jobs of specified tasks.

        Any job that is or was submitted or running can be polled, except for
        retrying tasks - which would poll (correctly) as failed. And don't poll
        succeeded tasks by default.

        This method uses _poll_task_jobs_callback() and
        _manip_task_jobs_callback() as help/callback methods.

        _poll_task_job_callback() executes one specific job.
        """
        to_poll_tasks = []
        pollable_statuses = set([
            TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED])
        if poll_succ:
            pollable_statuses.add(TASK_STATUS_SUCCEEDED)
        for itask in itasks:
            if itask.state.status in pollable_statuses:
                to_poll_tasks.append(itask)
            else:
                LOG.debug("skipping %s: not pollable, "
                          "or skipping 'succeeded' tasks" % itask.identity)
        if to_poll_tasks:
            if msg is not None:
                LOG.info(msg)
            self._run_job_cmd(
                self.JOBS_POLL, suite, to_poll_tasks,
                self._poll_task_jobs_callback)
Esempio n. 4
0
 def _manip_task_jobs_callback(
         self, ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning(
                         'Unhandled %s output: %s', ctx.cmd_key, line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = (
             "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Esempio n. 5
0
def addsect(cfig, sname, parents):
    """Add a new section to a nested dict."""
    for p in parents:
        # drop down the parent list
        cfig = cfig[p]
    if sname in cfig:
        # this doesn't warrant a warning unless contained items are repeated
        LOG.debug(
            'Section already encountered: %s', itemstr(parents + [sname]))
    else:
        cfig[sname] = OrderedDictWithDefaults()
Esempio n. 6
0
 def _remote_host_select_callback(self, proc_ctx, cmd_str):
     """Callback when host select command exits"""
     self.ready = True
     if proc_ctx.ret_code == 0 and proc_ctx.out:
         # Good status
         LOG.debug(proc_ctx)
         self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0]
     else:
         # Bad status
         LOG.error(proc_ctx)
         self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError(
             TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str,
             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
Esempio n. 7
0
File: client.py Progetto: cylc/cylc
    async def async_request(self, command, args=None, timeout=None):
        """Send an asynchronous request using asyncio.

        Has the same arguments and return values as ``serial_request``.

        """
        if timeout:
            timeout = float(timeout)
        timeout = (timeout * 1000 if timeout else None) or self.timeout
        if not args:
            args = {}

        # get secret for this request
        # assumes secret won't change during the request
        try:
            secret = self.secret()
        except cylc.flow.suite_srv_files_mgr.SuiteServiceFileError:
            raise ClientError('could not read suite passphrase')

        # send message
        msg = {'command': command, 'args': args}
        msg.update(self.header)
        LOG.debug('zmq:send %s' % msg)
        message = encrypt(msg, secret)
        self.socket.send_string(message)

        # receive response
        if self.poller.poll(timeout):
            res = await self.socket.recv_string()
        else:
            if self.timeout_handler:
                self.timeout_handler()
            raise ClientTimeout('Timeout waiting for server response.')

        try:
            response = decrypt(res, secret)
            LOG.debug('zmq:recv %s' % response)
        except jose.exceptions.JWTError:
            raise ClientError(
                'Could not decrypt response. Has the passphrase changed?')

        try:
            return response['data']
        except KeyError:
            error = response['error']
            raise ClientError(error['message'], error.get('traceback'))
Esempio n. 8
0
 def _run_command_init(cls, ctx, callback=None, callback_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         stdin_file.write(open(file_, 'rb').read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(
                     ctx.cmd_kwargs['stdin_files'][0], 'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = open(os.devnull)
         proc = procopen(
             ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except (IOError, OSError) as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx, callback, callback_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Esempio n. 9
0
 def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     LOG.debug(traceback.format_exc())
     LOG.error(exc)
     log_task_job_activity(
         SubProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1),
         suite, itask.point, itask.tdef.name, submit_num=itask.submit_num)
     if not dry_run:
         # Persist
         self.suite_db_mgr.put_insert_task_jobs(itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'batch_sys_name': itask.summary.get('batch_sys_name'),
         })
         itask.is_manual_submit = False
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
Esempio n. 10
0
    def _dump_item(path, item, value):
        """Dump "value" to a file called "item" in the directory "path".

        1. File permission should already be user-read-write-only on
           creation by mkstemp.
        2. The combination of os.fsync and os.rename should guarantee
           that we don't end up with an incomplete file.
        """
        os.makedirs(path, exist_ok=True)
        from tempfile import NamedTemporaryFile
        handle = NamedTemporaryFile(prefix=item, dir=path, delete=False)
        try:
            handle.write(value.encode())
        except AttributeError:
            handle.write(value)
        os.fsync(handle.fileno())
        handle.close()
        fname = os.path.join(path, item)
        os.rename(handle.name, fname)
        LOG.debug('Generated %s', fname)
Esempio n. 11
0
 def load(self):
     """Load or reload configuration from files."""
     self.sparse.clear()
     self.dense.clear()
     LOG.debug("Loading site/user global config files")
     conf_path_str = os.getenv("CYLC_CONF_PATH")
     if conf_path_str is None:
         # CYLC_CONF_PATH not defined, use default locations.
         for conf_dir_1, conf_dir_2, conf_type in [
                 (self.SITE_CONF_DIR, self.SITE_CONF_DIR_OLD,
                  upgrader.SITE_CONFIG),
                 (self.USER_CONF_DIR_1, self.USER_CONF_DIR_2,
                  upgrader.USER_CONFIG)]:
             fname1 = os.path.join(conf_dir_1, self.CONF_BASE)
             fname2 = os.path.join(conf_dir_2, self.CONF_BASE)
             if os.access(fname1, os.F_OK | os.R_OK):
                 fname = fname1
             elif os.access(fname2, os.F_OK | os.R_OK):
                 fname = fname2
             else:
                 continue
             try:
                 self.loadcfg(fname, conf_type)
             except ParsecError as exc:
                 if conf_type == upgrader.SITE_CONFIG:
                     # Warn on bad site file (users can't fix it).
                     LOG.warning(
                         'ignoring bad %s %s:\n%s', conf_type, fname, exc)
                 else:
                     # Abort on bad user file (users can fix it).
                     LOG.error('bad %s %s', conf_type, fname)
                     raise
                 break
     elif conf_path_str:
         # CYLC_CONF_PATH defined with a value
         for path in conf_path_str.split(os.pathsep):
             fname = os.path.join(path, self.CONF_BASE)
             if os.access(fname, os.F_OK | os.R_OK):
                 self.loadcfg(fname, upgrader.USER_CONFIG)
     # (OK if no global.rc is found, just use system defaults).
     self.transform()
Esempio n. 12
0
    def load(self):
        """Load or reload configuration from files."""
        self.sparse.clear()
        self.dense.clear()
        LOG.debug("Loading site/user config files")
        conf_path_str = os.getenv("CYLC_CONF_PATH")
        if conf_path_str:
            # Explicit config file override.
            fname = os.path.join(conf_path_str, self.CONF_BASENAME)
            self._load(fname, upgrader.USER_CONFIG)
        elif conf_path_str is None:
            # Use default locations.
            for conf_type, conf_dir in self.conf_dir_hierarchy:
                fname = os.path.join(conf_dir, self.CONF_BASENAME)
                try:
                    self._load(fname, conf_type)
                except ParsecError:
                    LOG.error(f'bad {conf_type} {fname}')
                    raise

        self._set_default_editors()
Esempio n. 13
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     try:
         tmphandle.close()
     except OSError:  # E.g. ignore bad unlink, etc
         pass
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(
         TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, (host, owner),
                             ' '.join(quote(item) for item in proc_ctx.cmd),
                             proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Esempio n. 14
0
 def _remote_init_callback(self, proc_ctx, host, owner, tmphandle):
     """Callback when "cylc remote-init" exits"""
     self.ready = True
     try:
         tmphandle.close()
     except OSError:  # E.g. ignore bad unlink, etc
         pass
     if proc_ctx.ret_code == 0:
         for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
             if status in proc_ctx.out:
                 # Good status
                 LOG.debug(proc_ctx)
                 self.remote_init_map[(host, owner)] = status
                 return
     # Bad status
     LOG.error(TaskRemoteMgmtError(
         TaskRemoteMgmtError.MSG_INIT,
         (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd),
         proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
     LOG.error(proc_ctx)
     self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
Esempio n. 15
0
def extract_resources(target_dir, resources=None):
    """Extract cylc.flow resources and write them to a target directory.

    Arguments:
        target_dir - where to put extracted resources, created if necessary
        resources - list of name resources, e.g. ['etc/foo.bar']
    """
    if resources is None:
        resources = resource_names
    for resource in resources:
        if resource not in resource_names:
            raise ValueError(f"Invalid resource name {resource}")
        path = Path(target_dir, resource)
        LOG.debug(f"Extracting {resource} to {path}")
        pdir = path.parent
        if not pdir.exists():
            pdir.mkdir(parents=True)
        # In spite of the name, this returns a byte array, not a string:
        res = pr.resource_string('cylc.flow', resource)
        with open(path, 'wb') as h:
            h.write(res)
Esempio n. 16
0
    def callback(self, ctx: SubFuncContext):
        """Callback for asynchronous xtrigger functions.

        Record satisfaction status and function results dict.

        Args:
            ctx (SubFuncContext): function context
        Raises:
            ValueError: if the context given is not active
        """
        LOG.debug(ctx)
        sig = ctx.get_signature()
        self.active.remove(sig)
        try:
            satisfied, results = json.loads(ctx.out)
        except (ValueError, TypeError):
            return
        LOG.debug('%s: returned %s' % (sig, results))
        if satisfied:
            self.pflag = True
            self.sat_xtrig[sig] = results
Esempio n. 17
0
def remote_clean(reg, platform_names, timeout):
    """Run subprocesses to clean workflows on remote install targets
    (skip localhost), given a set of platform names to look up.

    Args:
        reg (str): Workflow name.
        platform_names (list): List of platform names to look up in the global
            config, in order to determine the install targets to clean on.
        timeout (str): Number of seconds to wait before cancelling.
    """
    try:
        install_targets_map = (
            get_install_target_to_platforms_map(platform_names))
    except PlatformLookupError as exc:
        raise PlatformLookupError(
            "Cannot clean on remote platforms as the workflow database is "
            f"out of date/inconsistent with the global config - {exc}")

    pool = []
    for target, platforms in install_targets_map.items():
        if target == get_localhost_install_target():
            continue
        shuffle(platforms)
        LOG.info(
            f"Cleaning on install target: {platforms[0]['install target']}")
        # Issue ssh command:
        pool.append(
            (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms)
        )
    failed_targets = []
    # Handle subproc pool results almost concurrently:
    while pool:
        for proc, target, platforms in pool:
            ret_code = proc.poll()
            if ret_code is None:  # proc still running
                continue
            pool.remove((proc, target, platforms))
            out, err = (f.decode() for f in proc.communicate())
            if out:
                LOG.debug(out)
            if ret_code:
                # Try again using the next platform for this install target:
                this_platform = platforms.pop(0)
                excn = TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY, this_platform['name'],
                    " ".join(proc.args), ret_code, out, err)
                LOG.debug(excn)
                if platforms:
                    pool.append(
                        (_remote_clean_cmd(reg, platforms[0], timeout),
                         target, platforms)
                    )
                else:  # Exhausted list of platforms
                    failed_targets.append(target)
            elif err:
                LOG.debug(err)
        time.sleep(0.2)
    if failed_targets:
        raise CylcError(
            f"Could not clean on install targets: {', '.join(failed_targets)}")
Esempio n. 18
0
def make_symlink(path: Union[Path, str], target: Union[Path, str]) -> bool:
    """Makes symlinks for directories.

    Args:
        path: Absolute path of the desired symlink.
        target: Absolute path of the symlink's target directory.
    """
    path = Path(path)
    target = Path(target)
    if path.exists():
        # note all three checks are needed here due to case where user has set
        # their own symlink which does not match the global config set one.
        if path.is_symlink() and target.exists() and path.samefile(target):
            # correct symlink already exists
            return False
        # symlink name is in use by a physical file or directory
        # log and return
        LOG.debug(
            f"Unable to create symlink to {target}. "
            f"The path {path} already exists.")
        return False
    elif path.is_symlink():
        # remove a bad symlink.
        try:
            path.unlink()
        except OSError:
            raise WorkflowFilesError(
                f"Error when symlinking. Failed to unlink bad symlink {path}.")
    target.mkdir(parents=True, exist_ok=True)

    # This is needed in case share and share/cycle have the same symlink dir:
    if path.exists():
        return False

    path.parent.mkdir(parents=True, exist_ok=True)
    try:
        path.symlink_to(target)
        return True
    except OSError as exc:
        raise WorkflowFilesError(f"Error when symlinking\n{exc}")
Esempio n. 19
0
    def _rank_good_hosts(self, all_host_stats):
        """Rank, by specified method, 'good' hosts to return the most suitable.

        Take a dictionary of hosts considered 'good' with the corresponding
        metric data, and rank them via the method specified in the global
        configuration, returning the lowest-ranked (taken as best) host.
        """
        # Convert all dict values from full metrics structures to single
        # metric data values corresponding to the rank method to rank with.
        hosts_with_vals_to_rank = dict(
            (host, metric[self.rank_method])
            for host, metric in all_host_stats.items())
        LOG.debug(
            "INFO: host %s values extracted are: %s", self.rank_method,
            "\n".join("  %s: %s" % item
                      for item in hosts_with_vals_to_rank.items()))

        # Sort new dict by value to return ascending-value ordered host list.
        sort_asc_hosts = sorted(hosts_with_vals_to_rank,
                                key=hosts_with_vals_to_rank.get)
        base_msg = ("good (metric-returning) hosts were ranked in the "
                    "following order, from most to least suitable: %s")
        if self.rank_method in ("memory", "disk-space:" + self.USE_DISK_PATH):
            # Want 'most free' i.e. highest => reverse asc. list for ranking.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts[::-1]))
            return sort_asc_hosts[-1]
        else:  # A load av. is only poss. left; 'random' dealt with earlier.
            # Want lowest => ranking given by asc. list.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts))
            return sort_asc_hosts[0]
Esempio n. 20
0
    def _rank_good_hosts(self, all_host_stats):
        """Rank, by specified method, 'good' hosts to return the most suitable.

        Take a dictionary of hosts considered 'good' with the corresponding
        metric data, and rank them via the method specified in the global
        configuration, returning the lowest-ranked (taken as best) host.
        """
        # Convert all dict values from full metrics structures to single
        # metric data values corresponding to the rank method to rank with.
        hosts_with_vals_to_rank = dict(
            (host, metric[self.rank_method])
            for host, metric in all_host_stats.items())
        LOG.debug(
            "INFO: host %s values extracted are: %s",
            self.rank_method,
            "\n".join("  %s: %s" % item
                      for item in hosts_with_vals_to_rank.items()))

        # Sort new dict by value to return ascending-value ordered host list.
        sort_asc_hosts = sorted(
            hosts_with_vals_to_rank, key=hosts_with_vals_to_rank.get)
        base_msg = ("good (metric-returning) hosts were ranked in the "
                    "following order, from most to least suitable: %s")
        if self.rank_method in ("memory", "disk-space:" + self.USE_DISK_PATH):
            # Want 'most free' i.e. highest => reverse asc. list for ranking.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts[::-1]))
            return sort_asc_hosts[-1]
        else:  # A load av. is only poss. left; 'random' dealt with earlier.
            # Want lowest => ranking given by asc. list.
            LOG.debug(base_msg, ', '.join(sort_asc_hosts))
            return sort_asc_hosts[0]
Esempio n. 21
0
 def _prep_submit_task_job_error(self, suite, itask, action, exc):
     """Helper for self._prep_submit_task_job. On error."""
     LOG.debug("submit_num %s" % itask.submit_num)
     log_task_job_activity(SubProcContext(self.JOBS_SUBMIT,
                                          action,
                                          err=exc,
                                          ret_code=1),
                           suite,
                           itask.point,
                           itask.tdef.name,
                           submit_num=itask.submit_num)
     # Persist
     self.suite_db_mgr.put_insert_task_jobs(
         itask, {
             'is_manual_submit': itask.is_manual_submit,
             'try_num': itask.get_try_num(),
             'time_submit': get_current_time_string(),
             'batch_sys_name': itask.summary.get('batch_sys_name'),
         })
     itask.is_manual_submit = False
     self.task_events_mgr.process_message(
         itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
Esempio n. 22
0
    def callback(self, ctx: SubFuncContext):
        """Callback for asynchronous xtrigger functions.

        Record satisfaction status and function results dict.

        Args:
            ctx (SubFuncContext): function context
        Raises:
            ValueError: if the context given is not active
        """
        LOG.debug(ctx)
        sig = ctx.get_signature()
        self.active.remove(sig)
        try:
            satisfied, results = json.loads(ctx.out)
        except (ValueError, TypeError):
            return
        LOG.debug('%s: returned %s', sig, results)
        if satisfied:
            self.data_store_mgr.delta_task_xtrigger(sig, True)
            LOG.info('xtrigger satisfied: %s = %s', ctx.label, sig)
            self.sat_xtrig[sig] = results
Esempio n. 23
0
def log_task_job_activity(ctx, suite, point, name, submit_num=None):
    """Log an activity for a task job."""
    ctx_str = str(ctx)
    if not ctx_str:
        return
    if isinstance(ctx.cmd_key, tuple):  # An event handler
        submit_num = ctx.cmd_key[-1]
    job_activity_log = get_task_job_activity_log(suite, point, name,
                                                 submit_num)
    try:
        with open(job_activity_log, "ab") as handle:
            handle.write((ctx_str + '\n').encode())
    except IOError as exc:
        # This happens when there is no job directory, e.g. if job host
        # selection command causes an submission failure, there will be no job
        # directory. In this case, just send the information to the suite log.
        LOG.exception(exc)
        LOG.info(ctx_str)
    if ctx.cmd and ctx.ret_code:
        LOG.error(ctx_str)
    elif ctx.cmd:
        LOG.debug(ctx_str)
Esempio n. 24
0
File: server.py Progetto: cylc/cylc
    def _listener(self):
        """The server main loop, listen for and serve requests."""
        while True:
            # process any commands passed to the listener by its parent process
            if self.queue.qsize():
                command = self.queue.get()
                if command == 'STOP':
                    break
                else:
                    raise ValueError('Unknown command "%s"' % command)

            try:
                # wait RECV_TIMEOUT for a message
                msg = self.socket.recv_string()
            except zmq.error.Again:
                # timeout, continue with the loop, this allows the listener
                # thread to stop
                continue

            # attempt to decode the message, authenticating the user in the
            # process
            try:
                message = self.decode(msg, self.secret())
            except Exception as exc:  # purposefully catch generic exception
                # failed to decode message, possibly resulting from failed
                # authentication
                import traceback
                return {'error': {
                    'message': str(exc), 'traceback': traceback.format_exc()}}
            else:
                # success case - serve the request
                LOG.debug('zmq:recv %s', message)
                res = self._receiver(message)
                response = self.encode(res, self.secret())
                LOG.debug('zmq:send %s', res)

            # send back the response
            self.socket.send_string(response)
            sleep(0)  # yield control to other threads
Esempio n. 25
0
def addict(cfig, key, val, parents, index):
    """Add a new [parents...]key=value pair to a nested dict."""
    for p in parents:
        # drop down the parent list
        cfig = cfig[p]

    if not isinstance(cfig, dict):
        # an item of this name has already been encountered at this level
        raise FileParseError(
            'line %d: already encountered %s',
            index, itemstr(parents, key, val))

    if key in cfig:
        oldval = cfig[key]
        # this item already exists
        if (
            parents[0:2] == ['scheduling', 'graph'] or
            # BACK COMPAT: [scheduling][dependencies]
            # url:
            #     https://github.com/cylc/cylc-flow/pull/3191
            # from:
            #     Cylc<=7
            # to:
            #     Cylc8
            # remove at:
            #     Cylc9
            parents[0:2] == ['scheduling', 'dependencies']
        ):
            # append the new graph string to the existing one
            if not isinstance(cfig, list):
                cfig[key] = [cfig[key]]
            cfig[key].append(val)
        else:
            cfig[key] = val
        LOG.debug(
            '%s: already exists in configuration:\nold: %s\nnew: %s',
            key, repr(oldval), repr(cfig[key]))  # repr preserves \n
    else:
        cfig[key] = val
Esempio n. 26
0
def load(config, additional_plugins=None):
    additional_plugins = additional_plugins or []
    entry_points = {
        entry_point.name: entry_point
        for entry_point in pkg_resources.iter_entry_points('cylc.main_loop')
    }
    plugins = {'state': {}, 'timings': {}}
    for plugin_name in config['plugins'] + additional_plugins:
        # get plugin
        try:
            module_name = entry_points[plugin_name.replace(' ', '_')]
        except KeyError:
            raise UserInputError(
                f'No main-loop plugin: "{plugin_name}"\n' +
                '    Available plugins:\n' +
                indent('\n'.join(sorted(entry_points)), '        '))
        # load plugin
        try:
            module = module_name.load()
        except Exception:
            raise CylcError(f'Could not load plugin: "{plugin_name}"')
        # load coroutines
        log = []
        for coro_name, coro in ((coro_name, coro)
                                for coro_name, coro in getmembers(module)
                                if isfunction(coro)
                                if hasattr(coro, 'main_loop')):
            log.append(coro_name)
            plugins.setdefault(coro.main_loop,
                               {})[(plugin_name, coro_name)] = coro
            plugins['timings'][(plugin_name, coro_name)] = deque(maxlen=1)
        LOG.debug('Loaded main loop plugin "%s": %s', plugin_name + '\n',
                  '\n'.join((f'* {x}' for x in log)))
        # set the initial state of the plugin
        plugins['state'][plugin_name] = {}
    # make a note of the config here for ease of reference
    plugins['config'] = config
    return plugins
Esempio n. 27
0
def _remote_clean_cmd(reg, platform, timeout):
    """Remove a stopped workflow on a remote host.

    Call "cylc clean --local-only" over ssh and return the subprocess.

    Args:
        reg (str): Workflow name.
        platform (dict): Config for the platform on which to remove the
            workflow.
        timeout (str): Number of seconds to wait before cancelling the command.
    """
    LOG.debug(
        f'Cleaning on install target: {platform["install target"]} '
        f'(using platform: {platform["name"]})'
    )
    cmd = construct_ssh_cmd(
        ['clean', '--local-only', reg],
        platform,
        timeout=timeout,
        set_verbosity=True
    )
    LOG.debug(" ".join(cmd))
    return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
Esempio n. 28
0
 def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx):
     """Call back when log job retrieval completes."""
     if proc_ctx.ret_code:
         LOG.error(proc_ctx)
     else:
         LOG.debug(proc_ctx)
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             # All completed jobs are expected to have a "job.out".
             fnames = [JOB_LOG_OUT]
             try:
                 if key1[1] not in 'succeeded':
                     fnames.append(JOB_LOG_ERR)
             except TypeError:
                 pass
             fname_oks = {}
             for fname in fnames:
                 fname_oks[fname] = os.path.exists(
                     get_task_job_log(schd_ctx.suite, point, name,
                                      submit_num, fname))
             # All expected paths must exist to record a good attempt
             log_ctx = SubProcContext((key1, submit_num), None)
             if all(fname_oks.values()):
                 log_ctx.ret_code = 0
                 del self.event_timers[id_key]
             else:
                 log_ctx.ret_code = 1
                 log_ctx.err = "File(s) not retrieved:"
                 for fname, exist_ok in sorted(fname_oks.items()):
                     if not exist_ok:
                         log_ctx.err += " %s" % fname
                 self.event_timers[id_key].unset_waiting()
             log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                   submit_num)
         except KeyError as exc:
             LOG.exception(exc)
Esempio n. 29
0
    def _file_install_callback(self, ctx, platform, install_target):
        """Callback when file installation exits.

        Sets remote_init_map to REMOTE_FILE_INSTALL_DONE on success and to
        REMOTE_FILE_INSTALL_FAILED on error.
         """
        if ctx.out:
            RSYNC_LOG.info('File installation information for '
                           f'{install_target}:\n{ctx.out}')
        if ctx.ret_code == 0:
            # Both file installation and remote init success
            LOG.debug(f"File installation complete for {install_target}")
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE
            self.ready = True
            return
        else:
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_FAILED
            LOG.error(
                PlatformError(
                    PlatformError.MSG_INIT,
                    platform['name'],
                    ctx=ctx,
                ))
            self.ready = True
Esempio n. 30
0
    def create_cylc_run_tree(self, suite):
        """Create all top-level cylc-run output dirs on the suite host."""
        cfg = self.get()
        item = 'suite run directory'
        idir = self.get_derived_host_item(suite, item)
        LOG.debug('creating %s: %s', item, idir)
        if cfg['enable run directory housekeeping']:
            self.roll_directory(
                idir, item, cfg['run directory rolling archive length'])

        for item in [
                'suite log directory',
                'suite job log directory',
                'suite config log directory',
                'suite work directory',
                'suite share directory']:
            idir = self.get_derived_host_item(suite, item)
            LOG.debug('creating %s: %s', item, idir)
            self.create_directory(idir, item)

        item = 'temporary directory'
        value = cfg[item]
        if value:
            self.create_directory(value, item)
Esempio n. 31
0
    def file_install(self, platform):
        """Install required files on the remote install target.

        Included by default in the file installation:
            Files:
                .service/server.key  (required for ZMQ authentication)
            Directories:
                app/
                bin/
                etc/
                lib/
        """
        install_target = platform['install target']
        self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS
        src_path = get_workflow_run_dir(self.workflow)
        dst_path = get_remote_workflow_run_dir(self.workflow)
        install_target = platform['install target']
        ctx = SubProcContext(
            'file-install',
            construct_rsync_over_ssh_cmd(src_path, dst_path, platform,
                                         self.rsync_includes))
        LOG.debug(f"Begin file installation on {install_target}")
        self.proc_pool.put_command(ctx, self._file_install_callback,
                                   [install_target])
Esempio n. 32
0
def get_vc_info(path: Union[Path, str]) -> Optional['OrderedDict[str, str]']:
    """Return the version control information for a repository, given its path.
    """
    info = OrderedDict()
    missing_base = False
    for vcs, args in INFO_COMMANDS.items():
        try:
            out = _run_cmd(vcs, args, cwd=path)
        except VCSNotInstalledError as exc:
            LOG.debug(exc)
            continue
        except VCSMissingBaseError as exc:
            missing_base = True
            LOG.debug(exc)
        except OSError as exc:
            if not any(
                exc.strerror.lower().startswith(err)
                for err in NOT_REPO_ERRS[vcs]
            ):
                raise exc
            else:
                LOG.debug(f"Source dir {path} is not a {vcs} repository")
                continue

        info['version control system'] = vcs
        if vcs == SVN:
            info.update(_parse_svn_info(out))
        elif vcs == GIT:
            if not missing_base:
                info['repository version'] = out.splitlines()[0]
                info['commit'] = _get_git_commit(path)
            info['working copy root path'] = str(path)
        info['status'] = get_status(vcs, path)

        LOG.debug(f"{vcs} repository detected")
        return info

    return None
Esempio n. 33
0
def remove_dir_or_file(path: Union[Path, str]) -> None:
    """Delete a directory tree, or a file, or a symlink.
    Does not follow symlinks.

    Args:
        path: the absolute path of the directory/file/symlink to delete.
    """
    if not os.path.isabs(path):
        raise ValueError("Path must be absolute")
    if os.path.islink(path):
        LOG.debug(f"Removing symlink: {path}")
        os.remove(path)
    elif os.path.isfile(path):
        LOG.debug(f"Removing file: {path}")
        os.remove(path)
    else:
        LOG.debug(f"Removing directory: {path}")
        rmtree(path, onerror=handle_rmtree_err)
Esempio n. 34
0
    def upgrade_to_platforms(self):
        """upgrade [job]batch system and [remote]host to platform

        * Add 'platform' and 'user' columns to table task_jobs.
        * Remove 'user_at_host' and 'batch_sys_name' columns


        Returns:
            bool - True if upgrade performed, False if upgrade skipped.
        """
        conn = self.connect()

        # check if upgrade required
        schema = conn.execute(rf'PRAGMA table_info({self.TABLE_TASK_JOBS})')
        for _, name, *_ in schema:
            if name == 'platform_name':
                LOG.debug('platform_name column present - skipping db upgrade')
                return False

        # Perform upgrade:
        table = self.TABLE_TASK_JOBS
        LOG.info('Upgrade to Cylc 8 platforms syntax')
        conn.execute(rf'''
                ALTER TABLE
                    {table}
                ADD COLUMN
                    user TEXT
            ''')
        conn.execute(rf'''
                ALTER TABLE
                    {table}
                ADD COLUMN
                    platform_name TEXT
            ''')
        job_platforms = glbl_cfg(cached=False).get(['platforms'])
        for cycle, name, user_at_host, batch_system in conn.execute(rf'''
                SELECT
                    cycle, name, user_at_host, batch_system
                FROM
                    {table}
        '''):
            match = re.match(r"(?P<user>\S+)@(?P<host>\S+)", user_at_host)
            if match:
                user = match.group('user')
                host = match.group('host')
            else:
                user = ''
                host = user_at_host
            platform = platform_from_job_info(job_platforms,
                                              {'batch system': batch_system},
                                              {'host': host})
            conn.execute(
                rf'''
                    UPDATE
                        {table}
                    SET
                        user=?,
                        platform_name=?
                    WHERE
                        cycle==?
                        AND name==?
                ''', (user, platform, cycle, name))
        conn.commit()
        return True
Esempio n. 35
0
def parse(fpath, output_fname=None, template_vars=None):
    """Parse file items line-by-line into a corresponding nested dict."""

    # read and process the file (jinja2, include-files, line continuation)
    flines = read_and_proc(fpath, template_vars)
    if output_fname:
        with open(output_fname, 'w') as handle:
            handle.write('\n'.join(flines) + '\n')
        LOG.debug('Processed configuration dumped: %s', output_fname)

    nesting_level = 0
    config = OrderedDictWithDefaults()
    parents = []

    maxline = len(flines) - 1
    index = -1

    while index < maxline:
        index += 1
        line = flines[index]

        if re.match(_LINECOMMENT, line):
            # skip full-line comments
            continue

        if re.match(_BLANKLINE, line):
            # skip blank lines
            continue

        m = re.match(_HEADING, line)
        if m:
            # matched a section heading
            s_open, sect_name, s_close = m.groups()[1:-1]
            nb = len(s_open)

            if nb != len(s_close):
                raise FileParseError('bracket mismatch', index, line)
            elif nb == nesting_level:
                # sibling section
                parents = parents[:-1] + [sect_name]
            elif nb == nesting_level + 1:
                # child section
                parents = parents + [sect_name]
            elif nb < nesting_level:
                # back up one or more levels
                ndif = nesting_level - nb
                parents = parents[:-ndif - 1] + [sect_name]
            else:
                raise FileParseError(
                    'Error line ' + str(index + 1) + ': ' + line)
            nesting_level = nb
            addsect(config, sect_name, parents[:-1])

        else:
            m = re.match(_KEY_VALUE, line)
            if m:
                # matched a key=value item
                key, _, val = m.groups()[1:]
                if val.startswith('"""') or val.startswith("'''"):
                    # triple quoted - may be a multiline value
                    val, index = multiline(flines, val, index, maxline)
                addict(config, key, val, parents, index)
            else:
                # no match
                raise FileParseError(
                    'Invalid line ' + str(index + 1) + ': ' + line)

    return config
Esempio n. 36
0
def read_and_proc(fpath, template_vars=None, viewcfg=None, asedit=False):
    """
    Read a cylc parsec config file (at fpath), inline any include files,
    process with Jinja2, and concatenate continuation lines.
    Jinja2 processing must be done before concatenation - it could be
    used to generate continuation lines.
    """
    fdir = os.path.dirname(fpath)

    # Allow Python modules in lib/python/ (e.g. for use by Jinja2 filters).
    suite_lib_python = os.path.join(fdir, "lib", "python")
    if os.path.isdir(suite_lib_python) and suite_lib_python not in sys.path:
        sys.path.append(suite_lib_python)

    LOG.debug('Reading file %s', fpath)

    # read the file into a list, stripping newlines
    with open(fpath) as f:
        flines = [line.rstrip('\n') for line in f]

    do_inline = True
    do_empy = True
    do_jinja2 = True
    do_contin = True
    if viewcfg:
        if not viewcfg['empy']:
            do_empy = False
        if not viewcfg['jinja2']:
            do_jinja2 = False
        if not viewcfg['contin']:
            do_contin = False
        if not viewcfg['inline']:
            do_inline = False

    # inline any cylc include-files
    if do_inline:
        flines = inline(
            flines, fdir, fpath, False, viewcfg=viewcfg, for_edit=asedit)

    # process with EmPy
    if do_empy:
        if flines and re.match(r'^#![Ee]m[Pp]y\s*', flines[0]):
            LOG.debug('Processing with EmPy')
            try:
                from cylc.flow.parsec.empysupport import empyprocess
            except (ImportError, ModuleNotFoundError):
                raise ParsecError('EmPy Python package must be installed '
                                  'to process file: ' + fpath)
            flines = empyprocess(flines, fdir, template_vars)

    # process with Jinja2
    if do_jinja2:
        if flines and re.match(r'^#![jJ]inja2\s*', flines[0]):
            LOG.debug('Processing with Jinja2')
            try:
                from cylc.flow.parsec.jinja2support import jinja2process
            except (ImportError, ModuleNotFoundError):
                raise ParsecError('Jinja2 Python package must be installed '
                                  'to process file: ' + fpath)
            flines = jinja2process(flines, fdir, template_vars)

    # concatenate continuation lines
    if do_contin:
        flines = _concatenate(flines)

    # return rstripped lines
    return [fl.rstrip() for fl in flines]
Esempio n. 37
0
    def _prep_submit_task_job(self, suite, itask, dry_run, check_syntax=True):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        # Handle broadcasts
        overrides = self.task_events_mgr.broadcast_mgr.get_broadcast(
            itask.identity)
        if overrides:
            rtconfig = pdeepcopy(itask.tdef.rtconfig)
            poverride(rtconfig, overrides, prepend=True)
        else:
            rtconfig = itask.tdef.rtconfig

        # Determine task host settings now, just before job submission,
        # because dynamic host selection may be used.
        try:
            task_host = self.task_remote_mgr.remote_host_select(
                rtconfig['remote']['host'])
        except TaskRemoteMgmtError as exc:
            # Submit number not yet incremented
            itask.submit_num += 1
            itask.summary['job_hosts'][itask.submit_num] = ''
            # Retry delays, needed for the try_num
            self._set_retry_timers(itask, rtconfig)
            self._prep_submit_task_job_error(suite, itask, dry_run,
                                             '(remote host select)', exc)
            return False
        else:
            if task_host is None:  # host select not ready
                itask.set_summary_message(self.REMOTE_SELECT_MSG)
                return
            itask.task_host = task_host
            # Submit number not yet incremented
            itask.submit_num += 1
            # Retry delays, needed for the try_num
            self._set_retry_timers(itask, rtconfig)

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask, rtconfig)
            local_job_file_path = get_task_job_job_log(suite, itask.point,
                                                       itask.tdef.name,
                                                       itask.submit_num)
            self.job_file_writer.write(local_job_file_path,
                                       job_conf,
                                       check_syntax=check_syntax)
        except Exception as exc:
            # Could be a bad command template, IOError, etc
            self._prep_submit_task_job_error(suite, itask, dry_run,
                                             '(prepare job file)', exc)
            return False
        itask.local_job_file_path = local_job_file_path

        job_config = deepcopy(job_conf)
        job_config['logfiles'] = deepcopy(itask.summary['logfiles'])
        job_config['job_log_dir'] = get_task_job_log(suite, itask.point,
                                                     itask.tdef.name,
                                                     itask.submit_num)
        itask.jobs.append(job_config['job_d'])
        self.job_pool.insert_job(job_config)

        if dry_run:
            itask.set_summary_message(self.DRY_RUN_MSG)
            self.job_pool.add_job_msg(job_config['job_d'], self.DRY_RUN_MSG)
            LOG.debug(f'[{itask}] -{self.DRY_RUN_MSG}')

        # Return value used by "cylc submit" and "cylc jobscript":
        return itask
Esempio n. 38
0
    async def async_request(
            self,
            command: str,
            args: Optional[Dict[str, Any]] = None,
            timeout: Optional[float] = None,
            req_meta: Optional[Dict[str, Any]] = None) -> object:
        """Send an asynchronous request using asyncio.

        Has the same arguments and return values as ``serial_request``.

        """
        timeout = (float(timeout) * 1000 if timeout else None) or self.timeout
        if not args:
            args = {}

        # Note: we are using CurveZMQ to secure the messages (see
        # self.curve_auth, self.socket.curve_...key etc.). We have set up
        # public-key cryptography on the ZMQ messaging and sockets, so
        # there is no need to encrypt messages ourselves before sending.

        # send message
        msg: Dict[str, Any] = {'command': command, 'args': args}
        msg.update(self.header)
        # add the request metadata
        if req_meta:
            msg['meta'].update(req_meta)
        LOG.debug('zmq:send %s', msg)
        message = encode_(msg)
        self.socket.send_string(message)

        # receive response
        if self.poller.poll(timeout):
            res = await self.socket.recv()
        else:
            if callable(self.timeout_handler):
                self.timeout_handler()
            host, port, _ = get_location(self.workflow)
            if host != self.host or port != self.port:
                raise WorkflowStopped(self.workflow)
            raise ClientTimeout(
                'Timeout waiting for server response.'
                ' This could be due to network or server issues.'
                ' Check the workflow log.')

        if msg['command'] in PB_METHOD_MAP:
            response = {'data': res}
        else:
            response = decode_(res.decode())
        LOG.debug('zmq:recv %s', response)

        try:
            return response['data']
        except KeyError:
            error = response.get(
                'error',
                {'message': f'Received invalid response: {response}'},
            )
            raise ClientError(
                error.get('message'),
                error.get('traceback'),
            )
Esempio n. 39
0
    def detect_old_contact_file(self, reg, check_host_port=None):
        """Detect old suite contact file.

        If an old contact file does not exist, do nothing. If one does exist
        but the suite process is definitely not alive, remove it. If one exists
        and the suite process is still alive, raise SuiteServiceFileError.

        If check_host_port is specified and does not match the (host, port)
        value in the old contact file, raise AssertionError.

        Args:
            reg (str): suite name
            check_host_port (tuple): (host, port) to check against

        Raise:
            AssertionError:
                If old contact file exists but does not have matching
                (host, port) with value of check_host_port.
            SuiteServiceFileError:
                If old contact file exists and the suite process still alive.
        """
        # An old suite of the same name may be running if a contact file exists
        # and can be loaded.
        try:
            data = self.load_contact_file(reg)
            old_host = data[self.KEY_HOST]
            old_port = data[self.KEY_PORT]
            old_proc_str = data[self.KEY_PROCESS]
        except (IOError, ValueError, SuiteServiceFileError):
            # Contact file does not exist or corrupted, should be OK to proceed
            return
        if check_host_port and check_host_port != (old_host, int(old_port)):
            raise AssertionError("%s != (%s, %s)" % (
                check_host_port, old_host, old_port))
        # Run the "ps" command to see if the process is still running or not.
        # If the old suite process is still running, it should show up with the
        # same command line as before.
        # Terminate command after 10 seconds to prevent hanging, etc.
        old_pid_str = old_proc_str.split(None, 1)[0].strip()
        cmd = ["timeout", "10", "ps", self.PS_OPTS, str(old_pid_str)]
        if is_remote_host(old_host):
            import shlex
            ssh_str = str(glbl_cfg().get_host_item("ssh command", old_host))
            cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd
        from subprocess import Popen, PIPE
        from time import sleep, time
        proc = Popen(cmd, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE)
        # Terminate command after 10 seconds to prevent hanging SSH, etc.
        timeout = time() + 10.0
        while proc.poll() is None:
            if time() > timeout:
                proc.terminate()
            sleep(0.1)
        fname = self.get_contact_file(reg)
        ret_code = proc.wait()
        out, err = (f.decode() for f in proc.communicate())
        if ret_code:
            LOG.debug("$ %s  # return %d\n%s", ' '.join(cmd), ret_code, err)
        for line in reversed(out.splitlines()):
            if line.strip() == old_proc_str:
                # Suite definitely still running
                break
            elif line.split(None, 1)[0].strip() == "PID":
                # Only "ps" header - "ps" has run, but no matching results.
                # Suite not running. Attempt to remove suite contact file.
                try:
                    os.unlink(fname)
                    return
                except OSError:
                    break

        raise SuiteServiceFileError(
            (
                r"""suite contact file exists: %(fname)s

Suite "%(suite)s" is already running, and listening at "%(host)s:%(port)s".

To start a new run, stop the old one first with one or more of these:
* cylc stop %(suite)s              # wait for active tasks/event handlers
* cylc stop --kill %(suite)s       # kill active tasks and wait
* cylc stop --now %(suite)s        # don't wait for active tasks
* cylc stop --now --now %(suite)s  # don't wait
* ssh -n "%(host)s" kill %(pid)s   # final brute force!
"""
            ) % {
                "host": old_host,
                "port": old_port,
                "pid": old_pid_str,
                "fname": fname,
                "suite": reg,
            }
        )
Esempio n. 40
0
 def _load_remote_item(self, item, reg, owner, host):
     """Load content of service item from remote [owner@]host via SSH."""
     if not is_remote(host, owner):
         return
     if host is None:
         host = 'localhost'
     if owner is None:
         owner = get_user()
     if item == self.FILE_BASE_CONTACT and not is_remote_host(host):
         # Attempt to read suite contact file via the local filesystem.
         path = r'%(run_d)s/%(srv_base)s' % {
             'run_d': get_remote_suite_run_dir('localhost', owner, reg),
             'srv_base': self.DIR_BASE_SRV,
         }
         content = self._load_local_item(item, path)
         if content is not None:
             return content
         # Else drop through and attempt via ssh to the suite account.
     # Prefix STDOUT to ensure returned content is relevant
     prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg}
     # Attempt to cat passphrase file under suite service directory
     script = (r"""echo '%(prefix)s'; """
               r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''') % {
                   'prefix': prefix,
                   'run_d': get_remote_suite_run_dir(host, owner, reg),
                   'srv_base': self.DIR_BASE_SRV,
                   'item': item
               }
     import shlex
     command = shlex.split(glbl_cfg().get_host_item('ssh command', host,
                                                    owner))
     command += ['-n', owner + '@' + host, script]
     from subprocess import Popen, PIPE, DEVNULL  # nosec
     try:
         proc = Popen(command, stdin=DEVNULL, stdout=PIPE,
                      stderr=PIPE)  # nosec
     except OSError:
         if cylc.flow.flags.debug:
             import traceback
             traceback.print_exc()
         return
     out, err = (f.decode() for f in proc.communicate())
     ret_code = proc.wait()
     # Extract passphrase from STDOUT
     # It should live in the line with the correct prefix
     content = ""
     can_read = False
     for line in out.splitlines(True):
         if can_read:
             content += line
         elif line.strip() == prefix:
             can_read = True
     if not content or ret_code:
         LOG.debug(
             '$ %(command)s  # code=%(ret_code)s\n%(err)s',
             {
                 'command': command,
                 # STDOUT may contain passphrase, so not safe to print
                 # 'out': out,
                 'err': err,
                 'ret_code': ret_code,
             })
         return
     return content
Esempio n. 41
0
    def detect_old_contact_file(self, reg, check_host_port=None):
        """Detect old suite contact file.

        If an old contact file does not exist, do nothing. If one does exist
        but the suite process is definitely not alive, remove it. If one exists
        and the suite process is still alive, raise SuiteServiceFileError.

        If check_host_port is specified and does not match the (host, port)
        value in the old contact file, raise AssertionError.

        Args:
            reg (str): suite name
            check_host_port (tuple): (host, port) to check against

        Raise:
            AssertionError:
                If old contact file exists but does not have matching
                (host, port) with value of check_host_port.
            SuiteServiceFileError:
                If old contact file exists and the suite process still alive.
        """
        # An old suite of the same name may be running if a contact file exists
        # and can be loaded.
        try:
            data = self.load_contact_file(reg)
            old_host = data[self.KEY_HOST]
            old_port = data[self.KEY_PORT]
            old_proc_str = data[self.KEY_PROCESS]
        except (IOError, ValueError, SuiteServiceFileError):
            # Contact file does not exist or corrupted, should be OK to proceed
            return
        if check_host_port and check_host_port != (old_host, int(old_port)):
            raise AssertionError("%s != (%s, %s)" %
                                 (check_host_port, old_host, old_port))
        # Run the "ps" command to see if the process is still running or not.
        # If the old suite process is still running, it should show up with the
        # same command line as before.
        # Terminate command after 10 seconds to prevent hanging, etc.
        old_pid_str = old_proc_str.split(None, 1)[0].strip()
        cmd = ["timeout", "10", "ps", self.PS_OPTS, str(old_pid_str)]
        if is_remote_host(old_host):
            import shlex
            ssh_str = str(glbl_cfg().get_host_item("ssh command", old_host))
            cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd
        from subprocess import Popen, PIPE, DEVNULL  # nosec
        from time import sleep, time
        proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)  # nosec
        # Terminate command after 10 seconds to prevent hanging SSH, etc.
        timeout = time() + 10.0
        while proc.poll() is None:
            if time() > timeout:
                proc.terminate()
            sleep(0.1)
        fname = self.get_contact_file(reg)
        ret_code = proc.wait()
        out, err = (f.decode() for f in proc.communicate())
        if ret_code:
            LOG.debug("$ %s  # return %d\n%s", ' '.join(cmd), ret_code, err)
        for line in reversed(out.splitlines()):
            if line.strip() == old_proc_str:
                # Suite definitely still running
                break
            elif line.split(None, 1)[0].strip() == "PID":
                # Only "ps" header - "ps" has run, but no matching results.
                # Suite not running. Attempt to remove suite contact file.
                try:
                    os.unlink(fname)
                    return
                except OSError:
                    break

        raise SuiteServiceFileError(
            (r"""suite contact file exists: %(fname)s

Suite "%(suite)s" is already running, and listening at "%(host)s:%(port)s".

To start a new run, stop the old one first with one or more of these:
* cylc stop %(suite)s              # wait for active tasks/event handlers
* cylc stop --kill %(suite)s       # kill active tasks and wait
* cylc stop --now %(suite)s        # don't wait for active tasks
* cylc stop --now --now %(suite)s  # don't wait
* ssh -n "%(host)s" kill %(pid)s   # final brute force!
""") % {
                "host": old_host,
                "port": old_port,
                "pid": old_pid_str,
                "fname": fname,
                "suite": reg,
            })
Esempio n. 42
0
def jinja2process(flines, dir_, template_vars=None):
    """Pass configure file through Jinja2 processor."""
    # Load file lines into a template, excluding '#!jinja2' so that
    # '#!cylc-x.y.z' rises to the top. Callers should handle jinja2
    # TemplateSyntaxerror and TemplateError.
    if template_vars:
        LOG.debug(
            'Setting Jinja2 template variables:\n%s',
            '\n'.join(
                ['+ %s=%s' % item for item in sorted(template_vars.items())]))

    # Jinja2 render method requires a dictionary as argument (not None):
    if not template_vars:
        template_vars = {}

    # CALLERS SHOULD HANDLE JINJA2 TEMPLATESYNTAXERROR AND TEMPLATEERROR
    # AND TYPEERROR (e.g. for not using "|int" filter on number inputs.
    # Convert unicode to plain str, ToDo - still needed for parsec?)

    try:
        env = jinja2environment(dir_)
        template = env.from_string('\n'.join(flines[1:]))
        lines = str(template.render(template_vars)).splitlines()
    except TemplateSyntaxError as exc:
        filename = None
        # extract source lines
        if exc.lineno and exc.source and not exc.filename:
            # error in suite.rc or cylc include file
            lines = exc.source.splitlines()
        elif exc.lineno and exc.filename:
            # error in jinja2 include file
            filename = os.path.relpath(exc.filename, dir_)
            with open(exc.filename, 'r') as include_file:
                include_file.seek(max(exc.lineno - CONTEXT_LINES, 0), 0)
                lines = []
                for _ in range(CONTEXT_LINES):
                    lines.append(include_file.readline().splitlines()[0])
        if lines:
            # extract context lines from source lines
            lines = lines[max(exc.lineno - CONTEXT_LINES, 0):exc.lineno]

        raise Jinja2Error(exc, lines=lines, filename=filename)
    except Exception as exc:
        lineno = get_error_location()
        lines = None
        if lineno:
            lineno += 1  # shebang line ignored by jinja2
            lines = flines[max(lineno - CONTEXT_LINES, 0):lineno]
        raise Jinja2Error(exc, lines=lines)

    suiterc = []
    for line in lines:
        # Jinja2 leaves blank lines where source lines contain
        # only Jinja2 code; this matters if line continuation
        # markers are involved, so we remove blank lines here.
        if not line.strip():
            continue
            # restoring newlines here is only necessary for display by
        # the cylc view command:
        # ##suiterc.append(line + '\n')
        suiterc.append(line)

    return suiterc
Esempio n. 43
0
def construct_ssh_cmd(raw_cmd,
                      user=None,
                      host=None,
                      forward_x11=False,
                      stdin=False,
                      ssh_login_shell=None,
                      ssh_cylc=None,
                      set_UTC=False,
                      allow_flag_opts=False):
    """Append a bare command with further options required to run via ssh.

    Arguments:
        raw_cmd (list): primitive command to run remotely.
        user (string): user ID for the remote login.
        host (string): remote host name. Use 'localhost' if not specified.
        forward_x11 (boolean):
            If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'.
        stdin:
            If None, the `-n` option will be added to the SSH command line.
        ssh_login_shell (boolean):
            If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`.
        ssh_cylc (string):
            Location of the remote cylc executable.
        set_UTC (boolean):
            If True, check UTC mode and specify if set to True (non-default).
        allow_flag_opts (boolean):
            If True, check CYLC_DEBUG and CYLC_VERBOSE and if non-default,
            specify debug and/or verbosity as options to the 'raw cmd'.

    Return:
        A list containing a chosen command including all arguments and options
        necessary to directly execute the bare command on a given host via ssh.
    """
    command = shlex.split(glbl_cfg().get_host_item('ssh command', host, user))

    if forward_x11:
        command.append('-Y')
    if stdin is None:
        command.append('-n')

    user_at_host = ''
    if user:
        user_at_host = user + '@'
    if host:
        user_at_host += host
    else:
        user_at_host += 'localhost'
    command.append(user_at_host)

    # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through.
    command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)]
    try:
        command.append(
            quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH']))
    except KeyError:
        pass
    if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]:
        command.append(quote(r'CYLC_UTC=True'))
        command.append(quote(r'TZ=UTC'))

    # Use bash -l?
    if ssh_login_shell is None:
        ssh_login_shell = glbl_cfg().get_host_item('use login shell', host,
                                                   user)
    if ssh_login_shell:
        # A login shell will always source /etc/profile and the user's bash
        # profile file. To avoid having to quote the entire remote command
        # it is passed as arguments to the bash script.
        command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')]

    # 'cylc' on the remote host
    if ssh_cylc:
        command.append(ssh_cylc)
    else:
        ssh_cylc = glbl_cfg().get_host_item('cylc executable', host, user)
        if ssh_cylc.endswith('cylc'):
            command.append(ssh_cylc)
        else:
            # TODO - raise appropriate exception
            raise ValueError(
                r'ERROR: bad cylc executable in global config: %s' % ssh_cylc)

    # Insert core raw command after ssh, but before its own, command options.
    command += raw_cmd

    if allow_flag_opts:
        if (cylc.flow.flags.verbose
                or os.getenv('CYLC_VERBOSE') in ["True", "true"]):
            command.append(r'--verbose')
        if (cylc.flow.flags.debug
                or os.getenv('CYLC_DEBUG') in ["True", "true"]):
            command.append(r'--debug')
    if LOG.handlers:
        LOG.debug("$ %s", ' '.join(quote(c) for c in command))
    elif cylc.flow.flags.debug:
        sys.stderr.write("$ %s\n" % ' '.join(quote(c) for c in command))

    return command
Esempio n. 44
0
def read_and_proc(fpath, template_vars=None, viewcfg=None, asedit=False):
    """
    Read a cylc parsec config file (at fpath), inline any include files,
    process with Jinja2, and concatenate continuation lines.
    Jinja2 processing must be done before concatenation - it could be
    used to generate continuation lines.
    """
    fdir = os.path.dirname(fpath)

    # Allow Python modules in lib/python/ (e.g. for use by Jinja2 filters).
    suite_lib_python = os.path.join(fdir, "lib", "python")
    if os.path.isdir(suite_lib_python) and suite_lib_python not in sys.path:
        sys.path.append(suite_lib_python)

    LOG.debug('Reading file %s', fpath)

    # read the file into a list, stripping newlines
    with open(fpath) as f:
        flines = [line.rstrip('\n') for line in f]

    do_inline = True
    do_empy = True
    do_jinja2 = True
    do_contin = True
    if viewcfg:
        if not viewcfg['empy']:
            do_empy = False
        if not viewcfg['jinja2']:
            do_jinja2 = False
        if not viewcfg['contin']:
            do_contin = False
        if not viewcfg['inline']:
            do_inline = False

    # inline any cylc include-files
    if do_inline:
        flines = inline(
            flines, fdir, fpath, False, viewcfg=viewcfg, for_edit=asedit)

    # process with EmPy
    if do_empy:
        if flines and re.match(r'^#![Ee]m[Pp]y\s*', flines[0]):
            LOG.debug('Processing with EmPy')
            try:
                from cylc.flow.parsec.empysupport import empyprocess
            except (ImportError, ModuleNotFoundError):
                raise ParsecError('EmPy Python package must be installed '
                                  'to process file: ' + fpath)
            flines = empyprocess(flines, fdir, template_vars)

    # process with Jinja2
    if do_jinja2:
        if flines and re.match(r'^#![jJ]inja2\s*', flines[0]):
            LOG.debug('Processing with Jinja2')
            try:
                from cylc.flow.parsec.jinja2support import jinja2process
            except (ImportError, ModuleNotFoundError):
                raise ParsecError('Jinja2 Python package must be installed '
                                  'to process file: ' + fpath)
            flines = jinja2process(flines, fdir, template_vars)

    # concatenate continuation lines
    if do_contin:
        flines = _concatenate(flines)

    # return rstripped lines
    return [fl.rstrip() for fl in flines]
Esempio n. 45
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (
                self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name']) and
                not is_remote_host(host)
            ):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info(
                    '[%s] -submit-num=%02d, owner@host=%s',
                    itask, itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(itask, {
                    'is_manual_submit': itask.is_manual_submit,
                    'try_num': itask.get_try_num(),
                    'time_submit': now_str,
                    'user_at_host': owner_at_host,
                    'batch_sys_name': itask.summary['batch_sys_name'],
                })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(
                            self.JOBS_SUBMIT,
                            '(init %s)' % owner_at_host,
                            err=REMOTE_INIT_FAILED,
                            ret_code=1),
                        suite, itask.point, itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size] for i in range(0,
                                                        len(itasks),
                                                        chunk_size)]
            LOG.debug(
                '%s ... # will invoke in batches, sizes=%s',
                cmd, [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(
                                suite, itask.point, itask.tdef.name,
                                itask.submit_num))
                    job_log_dirs.append(get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset_state(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                        **kwargs
                    ),
                    self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks
Esempio n. 46
0
 def _load_remote_item(self, item, reg, owner, host):
     """Load content of service item from remote [owner@]host via SSH."""
     if not is_remote(host, owner):
         return
     if host is None:
         host = 'localhost'
     if owner is None:
         owner = get_user()
     if item == self.FILE_BASE_CONTACT and not is_remote_host(host):
         # Attempt to read suite contact file via the local filesystem.
         path = r'%(run_d)s/%(srv_base)s' % {
             'run_d': glbl_cfg().get_derived_host_item(
                 reg, 'suite run directory', 'localhost', owner,
                 replace_home=False),
             'srv_base': self.DIR_BASE_SRV,
         }
         content = self._load_local_item(item, path)
         if content is not None:
             return content
         # Else drop through and attempt via ssh to the suite account.
     # Prefix STDOUT to ensure returned content is relevant
     prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg}
     # Attempt to cat passphrase file under suite service directory
     script = (
         r"""echo '%(prefix)s'; """
         r'''cat "%(run_d)s/%(srv_base)s/%(item)s"'''
     ) % {
         'prefix': prefix,
         'run_d': glbl_cfg().get_derived_host_item(
             reg, 'suite run directory', host, owner),
         'srv_base': self.DIR_BASE_SRV,
         'item': item
     }
     import shlex
     command = shlex.split(
         glbl_cfg().get_host_item('ssh command', host, owner))
     command += ['-n', owner + '@' + host, script]
     from subprocess import Popen, PIPE
     try:
         proc = Popen(
             command, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE)
     except OSError:
         if cylc.flow.flags.debug:
             import traceback
             traceback.print_exc()
         return
     out, err = (f.decode() for f in proc.communicate())
     ret_code = proc.wait()
     # Extract passphrase from STDOUT
     # It should live in the line with the correct prefix
     content = ""
     can_read = False
     for line in out.splitlines(True):
         if can_read:
             content += line
         elif line.strip() == prefix:
             can_read = True
     if not content or ret_code:
         LOG.debug(
             '$ %(command)s  # code=%(ret_code)s\n%(err)s',
             {
                 'command': command,
                 # STDOUT may contain passphrase, so not safe to print
                 # 'out': out,
                 'err': err,
                 'ret_code': ret_code,
             })
         return
     return content
Esempio n. 47
0
    def _prep_submit_task_job(self, suite, itask, dry_run, check_syntax=True):
        """Prepare a task job submission.

        Return itask on a good preparation.

        """
        if itask.local_job_file_path and not dry_run:
            return itask

        # Handle broadcasts
        overrides = self.task_events_mgr.broadcast_mgr.get_broadcast(
            itask.identity)
        if overrides:
            rtconfig = pdeepcopy(itask.tdef.rtconfig)
            poverride(rtconfig, overrides, prepend=True)
        else:
            rtconfig = itask.tdef.rtconfig

        # Determine task host settings now, just before job submission,
        # because dynamic host selection may be used.
        try:
            task_host = self.task_remote_mgr.remote_host_select(
                rtconfig['remote']['host'])
        except TaskRemoteMgmtError as exc:
            # Submit number not yet incremented
            itask.submit_num += 1
            itask.summary['job_hosts'][itask.submit_num] = ''
            # Retry delays, needed for the try_num
            self._set_retry_timers(itask, rtconfig)
            self._prep_submit_task_job_error(
                suite, itask, dry_run, '(remote host select)', exc)
            return False
        else:
            if task_host is None:  # host select not ready
                itask.set_summary_message(self.REMOTE_SELECT_MSG)
                return
            itask.task_host = task_host
            # Submit number not yet incremented
            itask.submit_num += 1
            # Retry delays, needed for the try_num
            self._set_retry_timers(itask, rtconfig)

        try:
            job_conf = self._prep_submit_task_job_impl(suite, itask, rtconfig)
            local_job_file_path = get_task_job_job_log(
                suite, itask.point, itask.tdef.name, itask.submit_num)
            self.job_file_writer.write(local_job_file_path, job_conf,
                                       check_syntax=check_syntax)
        except Exception as exc:
            # Could be a bad command template, IOError, etc
            self._prep_submit_task_job_error(
                suite, itask, dry_run, '(prepare job file)', exc)
            return False
        itask.local_job_file_path = local_job_file_path

        if dry_run:
            itask.set_summary_message('job file written (edit/dry-run)')
            LOG.debug('[%s] -%s', itask, itask.summary['latest_message'])

        # Return value used by "cylc submit" and "cylc jobscript":
        return itask
Esempio n. 48
0
def jinja2process(flines, dir_, template_vars=None):
    """Pass configure file through Jinja2 processor."""
    # Load file lines into a template, excluding '#!jinja2' so that
    # '#!cylc-x.y.z' rises to the top. Callers should handle jinja2
    # TemplateSyntaxerror and TemplateError.
    if template_vars:
        LOG.debug(
            'Setting Jinja2 template variables:\n%s', '\n'.join(
                ['+ %s=%s' % item for item in sorted(template_vars.items())]))

    # Jinja2 render method requires a dictionary as argument (not None):
    if not template_vars:
        template_vars = {}

    # CALLERS SHOULD HANDLE JINJA2 TEMPLATESYNTAXERROR AND TEMPLATEERROR
    # AND TYPEERROR (e.g. for not using "|int" filter on number inputs.
    # Convert unicode to plain str, ToDo - still needed for parsec?)

    try:
        env = jinja2environment(dir_)
        template = env.from_string('\n'.join(flines[1:]))
        lines = str(template.render(template_vars)).splitlines()
    except TemplateSyntaxError as exc:
        filename = None
        # extract source lines
        if exc.lineno and exc.source and not exc.filename:
            # error in flow.cylc or cylc include file
            lines = exc.source.splitlines()
        elif exc.lineno and exc.filename:
            # error in jinja2 include file
            filename = os.path.relpath(exc.filename, dir_)
            with open(exc.filename, 'r') as include_file:
                include_file.seek(max(exc.lineno - CONTEXT_LINES, 0), 0)
                lines = []
                for _ in range(CONTEXT_LINES):
                    lines.append(include_file.readline().splitlines()[0])
        if lines:
            # extract context lines from source lines
            lines = lines[max(exc.lineno - CONTEXT_LINES, 0):exc.lineno]

        raise Jinja2Error(exc, lines=lines, filename=filename)
    except Exception as exc:
        lineno = get_error_location()
        lines = None
        if lineno:
            lineno += 1  # shebang line ignored by jinja2
            lines = flines[max(lineno - CONTEXT_LINES, 0):lineno]
        raise Jinja2Error(exc, lines=lines)

    flow_config = []
    for line in lines:
        # Jinja2 leaves blank lines where source lines contain
        # only Jinja2 code; this matters if line continuation
        # markers are involved, so we remove blank lines here.
        if not line.strip():
            continue
            # restoring newlines here is only necessary for display by
        # the cylc view command:
        # ##flow_config.append(line + '\n')
        flow_config.append(line)

    return flow_config
Esempio n. 49
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                    self.job_pool.add_job_msg(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num),
                        self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name'])
                    and not is_remote_host(host)):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info('[%s] -submit-num=%02d, owner@host=%s', itask,
                         itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'user_at_host': owner_at_host,
                        'batch_sys_name': itask.summary['batch_sys_name'],
                    })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(self.JOBS_SUBMIT,
                                       '(init %s)' % owner_at_host,
                                       err=REMOTE_INIT_FAILED,
                                       ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [('host', host, is_remote_host),
                                          ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(get_remote_suite_run_job_dir(host, owner, suite))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size]
                for i in range(0, len(itasks), chunk_size)
            ]
            LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd,
                      [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(suite, itask.point,
                                                 itask.tdef.name,
                                                 itask.submit_num))
                    job_log_dirs.append(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(self.JOBS_SUBMIT,
                                   cmd + job_log_dirs,
                                   stdin_files=stdin_files,
                                   job_log_dirs=job_log_dirs,
                                   **kwargs), self._submit_task_jobs_callback,
                    [suite, itasks_batch])
        return done_tasks
Esempio n. 50
0
File: remote.py Progetto: cylc/cylc
def construct_ssh_cmd(raw_cmd, user=None, host=None, forward_x11=False,
                      stdin=False, ssh_login_shell=None, ssh_cylc=None,
                      set_UTC=False, allow_flag_opts=False):
    """Append a bare command with further options required to run via ssh.

    Arguments:
        raw_cmd (list): primitive command to run remotely.
        user (string): user ID for the remote login.
        host (string): remote host name. Use 'localhost' if not specified.
        forward_x11 (boolean):
            If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'.
        stdin:
            If None, the `-n` option will be added to the SSH command line.
        ssh_login_shell (boolean):
            If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`.
        ssh_cylc (string):
            Location of the remote cylc executable.
        set_UTC (boolean):
            If True, check UTC mode and specify if set to True (non-default).
        allow_flag_opts (boolean):
            If True, check CYLC_DEBUG and CYLC_VERBOSE and if non-default,
            specify debug and/or verbosity as options to the 'raw cmd'.

    Return:
        A list containing a chosen command including all arguments and options
        necessary to directly execute the bare command on a given host via ssh.
    """
    command = shlex.split(glbl_cfg().get_host_item('ssh command', host, user))

    if forward_x11:
        command.append('-Y')
    if stdin is None:
        command.append('-n')

    user_at_host = ''
    if user:
        user_at_host = user + '@'
    if host:
        user_at_host += host
    else:
        user_at_host += 'localhost'
    command.append(user_at_host)

    # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through.
    command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)]
    try:
        command.append(
            quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH']))
    except KeyError:
        pass
    if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]:
        command.append(quote(r'CYLC_UTC=True'))
        command.append(quote(r'TZ=UTC'))

    # Use bash -l?
    if ssh_login_shell is None:
        ssh_login_shell = glbl_cfg().get_host_item(
            'use login shell', host, user)
    if ssh_login_shell:
        # A login shell will always source /etc/profile and the user's bash
        # profile file. To avoid having to quote the entire remote command
        # it is passed as arguments to the bash script.
        command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')]

    # 'cylc' on the remote host
    if ssh_cylc:
        command.append(ssh_cylc)
    else:
        ssh_cylc = glbl_cfg().get_host_item('cylc executable', host, user)
        if ssh_cylc.endswith('cylc'):
            command.append(ssh_cylc)
        else:
            # TODO - raise appropriate exception
            raise ValueError(
                r'ERROR: bad cylc executable in global config: %s' % ssh_cylc)

    # Insert core raw command after ssh, but before its own, command options.
    command += raw_cmd

    if allow_flag_opts:
        if (cylc.flow.flags.verbose or os.getenv('CYLC_VERBOSE') in
                ["True", "true"]):
            command.append(r'--verbose')
        if (cylc.flow.flags.debug or os.getenv('CYLC_DEBUG') in
                ["True", "true"]):
            command.append(r'--debug')
    if LOG.handlers:
        LOG.debug("$ %s", ' '.join(quote(c) for c in command))
    elif cylc.flow.flags.debug:
        sys.stderr.write("$ %s\n" % ' '.join(quote(c) for c in command))

    return command
Esempio n. 51
0
 def _bespoke_stop(self):
     """Bespoke stop items."""
     LOG.debug('stopping zmq socket...')
     self.stopping = True
Esempio n. 52
0
    def upgrade_is_held(self):
        """Upgrade hold_swap => is_held.

        * Add a is_held column.
        * Set status and is_held as per the new schema.
        * Set the swap_hold values to None
          (bacause sqlite3 does not support DROP COLUMN)

        From:
            cylc<8
        To:
            cylc>=8
        PR:
            #3230

        Returns:
            bool - True if upgrade performed, False if upgrade skipped.

        """
        conn = self.connect()

        # check if upgrade required
        schema = conn.execute(rf'PRAGMA table_info({self.TABLE_TASK_POOL})')
        for _, name, *_ in schema:
            if name == 'is_held':
                LOG.debug('is_held column present - skipping db upgrade')
                return False

        # perform upgrade
        for table in [self.TABLE_TASK_POOL, self.TABLE_TASK_POOL_CHECKPOINTS]:
            LOG.info('Upgrade hold_swap => is_held in %s', table)
            conn.execute(rf'''
                    ALTER TABLE
                        {table}
                    ADD COLUMN
                        is_held BOOL
                ''')
            for cycle, name, status, hold_swap in conn.execute(rf'''
                    SELECT
                        cycle, name, status, hold_swap
                    FROM
                        {table}
            '''):
                if status == 'held':
                    new_status = hold_swap
                    is_held = True
                elif hold_swap == 'held':
                    new_status = status
                    is_held = True
                else:
                    new_status = status
                    is_held = False
                conn.execute(
                    rf'''
                        UPDATE
                            {table}
                        SET
                            status=?,
                            is_held=?,
                            hold_swap=?
                        WHERE
                            cycle==?
                            AND name==?
                    ''', (new_status, is_held, None, cycle, name))
            self.remove_columns(table, ['hold_swap'])
            conn.commit()
        return True
Esempio n. 53
0
def parse(fpath, output_fname=None, template_vars=None):
    """Parse file items line-by-line into a corresponding nested dict."""

    # read and process the file (jinja2, include-files, line continuation)
    flines = read_and_proc(fpath, template_vars)
    if output_fname:
        with open(output_fname, 'w') as handle:
            handle.write('\n'.join(flines) + '\n')
        LOG.debug('Processed configuration dumped: %s', output_fname)

    nesting_level = 0
    config = OrderedDictWithDefaults()
    parents = []

    maxline = len(flines) - 1
    index = -1

    while index < maxline:
        index += 1
        line = flines[index]

        if re.match(_LINECOMMENT, line):
            # skip full-line comments
            continue

        if re.match(_BLANKLINE, line):
            # skip blank lines
            continue

        m = re.match(_HEADING, line)
        if m:
            # matched a section heading
            s_open, sect_name, s_close = m.groups()[1:-1]
            nb = len(s_open)

            if nb != len(s_close):
                raise FileParseError('bracket mismatch', index, line)
            elif nb == nesting_level:
                # sibling section
                parents = parents[:-1] + [sect_name]
            elif nb == nesting_level + 1:
                # child section
                parents = parents + [sect_name]
            elif nb < nesting_level:
                # back up one or more levels
                ndif = nesting_level - nb
                parents = parents[:-ndif - 1] + [sect_name]
            else:
                raise FileParseError(
                    'Error line ' + str(index + 1) + ': ' + line)
            nesting_level = nb
            addsect(config, sect_name, parents[:-1])

        else:
            m = re.match(_KEY_VALUE, line)
            if m:
                # matched a key=value item
                key, _, val = m.groups()[1:]
                if val.startswith('"""') or val.startswith("'''"):
                    # triple quoted - may be a multiline value
                    val, index = multiline(flines, val, index, maxline)
                addict(config, key, val, parents, index)
            else:
                # no match
                raise FileParseError(
                    'Invalid line ' + str(index + 1) + ': ' + line)

    return config
Esempio n. 54
0
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            ".service/passphrase": For TCP task communication
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        comm_meth = glbl_cfg().get_host_item(
            'task communication method', host, owner)
        owner_at_host = 'localhost'
        if host:
            owner_at_host = host
        if owner:
            owner_at_host = owner + '@' + owner_at_host
        LOG.debug('comm_meth[%s]=%s' % (owner_at_host, comm_meth))
        items = self._remote_init_items(comm_meth)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid_str).encode())
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flow.flags.debug:
            cmd.append('--debug')
        if comm_meth in ['ssh']:
            cmd.append('--indirect-comm=%s' % comm_meth)
        cmd.append(str(self.uuid_str))
        cmd.append(glbl_cfg().get_derived_host_item(
            self.suite, 'suite run directory', host, owner))
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback,
            [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]