def addict(cfig, key, val, parents, index): """Add a new [parents...]key=value pair to a nested dict.""" for p in parents: # drop down the parent list cfig = cfig[p] if not isinstance(cfig, dict): # an item of this name has already been encountered at this level raise FileParseError( 'line %d: already encountered %s', index, itemstr(parents, key, val)) if key in cfig: # this item already exists if (key == 'graph' and ( parents == ['scheduling', 'dependencies'] or len(parents) == 3 and parents[-3:-1] == ['scheduling', 'dependencies'])): # append the new graph string to the existing one LOG.debug('Merging graph strings under %s', itemstr(parents)) if not isinstance(cfig[key], list): cfig[key] = [cfig[key]] cfig[key].append(val) else: # otherwise override the existing item LOG.debug( 'overriding %s old value: %s new value: %s', itemstr(parents, key), cfig[key], val) cfig[key] = val else: cfig[key] = val
def stop(self): """Finish serving the current request then stop the server.""" LOG.debug('stopping zmq server...') self.queue.put('STOP') self.thread.join() # wait for the listener to return self.socket.close() LOG.debug('...stopped')
def poll_task_jobs(self, suite, itasks, poll_succ=True, msg=None): """Poll jobs of specified tasks. Any job that is or was submitted or running can be polled, except for retrying tasks - which would poll (correctly) as failed. And don't poll succeeded tasks by default. This method uses _poll_task_jobs_callback() and _manip_task_jobs_callback() as help/callback methods. _poll_task_job_callback() executes one specific job. """ to_poll_tasks = [] pollable_statuses = set([ TASK_STATUS_SUBMITTED, TASK_STATUS_RUNNING, TASK_STATUS_FAILED]) if poll_succ: pollable_statuses.add(TASK_STATUS_SUCCEEDED) for itask in itasks: if itask.state.status in pollable_statuses: to_poll_tasks.append(itask) else: LOG.debug("skipping %s: not pollable, " "or skipping 'succeeded' tasks" % itask.identity) if to_poll_tasks: if msg is not None: LOG.info(msg) self._run_job_cmd( self.JOBS_POLL, suite, to_poll_tasks, self._poll_task_jobs_callback)
def _manip_task_jobs_callback( self, ctx, suite, itasks, summary_callback, more_callbacks=None): """Callback when submit/poll/kill tasks command exits.""" if ctx.ret_code: LOG.error(ctx) else: LOG.debug(ctx) # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy # # Note for "reload": A TaskProxy instance may be replaced on reload, so # the "itasks" list may not reference the TaskProxy objects that # replace the old ones. The .reload_successor attribute provides the # link(s) for us to get to the latest replacement. # # Note for "kill": It is possible for a job to trigger its trap and # report back to the suite back this logic is called. If so, the task # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and # its output line will be ignored here. tasks = {} for itask in itasks: while itask.reload_successor is not None: itask = itask.reload_successor if itask.point is not None and itask.submit_num: submit_num = "%02d" % (itask.submit_num) tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)] if more_callbacks: for prefix, callback in more_callbacks.items(): handlers.append((prefix, callback)) out = ctx.out if not out: out = "" bad_tasks = dict(tasks) for line in out.splitlines(True): for prefix, callback in handlers: if line.startswith(prefix): line = line[len(prefix):].strip() try: path = line.split("|", 2)[1] # timestamp, path, status point, name, submit_num = path.split(os.sep, 2) if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY: del bad_tasks[(point, name, submit_num)] itask = tasks[(point, name, submit_num)] callback(suite, itask, ctx, line) except (LookupError, ValueError, KeyError) as exc: LOG.warning( 'Unhandled %s output: %s', ctx.cmd_key, line) LOG.exception(exc) # Task jobs that are in the original command but did not get a status # in the output. Handle as failures. for key, itask in sorted(bad_tasks.items()): line = ( "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n") summary_callback(suite, itask, ctx, line)
def addsect(cfig, sname, parents): """Add a new section to a nested dict.""" for p in parents: # drop down the parent list cfig = cfig[p] if sname in cfig: # this doesn't warrant a warning unless contained items are repeated LOG.debug( 'Section already encountered: %s', itemstr(parents + [sname])) else: cfig[sname] = OrderedDictWithDefaults()
def _remote_host_select_callback(self, proc_ctx, cmd_str): """Callback when host select command exits""" self.ready = True if proc_ctx.ret_code == 0 and proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_host_str_map[cmd_str] = proc_ctx.out.splitlines()[0] else: # Bad status LOG.error(proc_ctx) self.remote_host_str_map[cmd_str] = TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_SELECT, (cmd_str, None), cmd_str, proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)
async def async_request(self, command, args=None, timeout=None): """Send an asynchronous request using asyncio. Has the same arguments and return values as ``serial_request``. """ if timeout: timeout = float(timeout) timeout = (timeout * 1000 if timeout else None) or self.timeout if not args: args = {} # get secret for this request # assumes secret won't change during the request try: secret = self.secret() except cylc.flow.suite_srv_files_mgr.SuiteServiceFileError: raise ClientError('could not read suite passphrase') # send message msg = {'command': command, 'args': args} msg.update(self.header) LOG.debug('zmq:send %s' % msg) message = encrypt(msg, secret) self.socket.send_string(message) # receive response if self.poller.poll(timeout): res = await self.socket.recv_string() else: if self.timeout_handler: self.timeout_handler() raise ClientTimeout('Timeout waiting for server response.') try: response = decrypt(res, secret) LOG.debug('zmq:recv %s' % response) except jose.exceptions.JWTError: raise ClientError( 'Could not decrypt response. Has the passphrase changed?') try: return response['data'] except KeyError: error = response['error'] raise ClientError(error['message'], error.get('traceback'))
def _run_command_init(cls, ctx, callback=None, callback_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_files'): if len(ctx.cmd_kwargs['stdin_files']) > 1: stdin_file = cls.get_temporary_file() for file_ in ctx.cmd_kwargs['stdin_files']: if hasattr(file_, 'read'): stdin_file.write(file_.read()) else: stdin_file.write(open(file_, 'rb').read()) stdin_file.seek(0) elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'): stdin_file = ctx.cmd_kwargs['stdin_files'][0] else: stdin_file = open( ctx.cmd_kwargs['stdin_files'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = cls.get_temporary_file() stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode()) stdin_file.seek(0) else: stdin_file = open(os.devnull) proc = procopen( ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), usesh=ctx.cmd_kwargs.get('shell')) # calls to open a shell are aggregated in cylc_subproc.procopen() # with logging for what is calling it and the commands given except (IOError, OSError) as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, callback, callback_args) return None else: LOG.debug(ctx.cmd) return proc
def _prep_submit_task_job_error(self, suite, itask, dry_run, action, exc): """Helper for self._prep_submit_task_job. On error.""" LOG.debug("submit_num %s" % itask.submit_num) LOG.debug(traceback.format_exc()) LOG.error(exc) log_task_job_activity( SubProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1), suite, itask.point, itask.tdef.name, submit_num=itask.submit_num) if not dry_run: # Persist self.suite_db_mgr.put_insert_task_jobs(itask, { 'is_manual_submit': itask.is_manual_submit, 'try_num': itask.get_try_num(), 'time_submit': get_current_time_string(), 'batch_sys_name': itask.summary.get('batch_sys_name'), }) itask.is_manual_submit = False self.task_events_mgr.process_message( itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
def _dump_item(path, item, value): """Dump "value" to a file called "item" in the directory "path". 1. File permission should already be user-read-write-only on creation by mkstemp. 2. The combination of os.fsync and os.rename should guarantee that we don't end up with an incomplete file. """ os.makedirs(path, exist_ok=True) from tempfile import NamedTemporaryFile handle = NamedTemporaryFile(prefix=item, dir=path, delete=False) try: handle.write(value.encode()) except AttributeError: handle.write(value) os.fsync(handle.fileno()) handle.close() fname = os.path.join(path, item) os.rename(handle.name, fname) LOG.debug('Generated %s', fname)
def load(self): """Load or reload configuration from files.""" self.sparse.clear() self.dense.clear() LOG.debug("Loading site/user global config files") conf_path_str = os.getenv("CYLC_CONF_PATH") if conf_path_str is None: # CYLC_CONF_PATH not defined, use default locations. for conf_dir_1, conf_dir_2, conf_type in [ (self.SITE_CONF_DIR, self.SITE_CONF_DIR_OLD, upgrader.SITE_CONFIG), (self.USER_CONF_DIR_1, self.USER_CONF_DIR_2, upgrader.USER_CONFIG)]: fname1 = os.path.join(conf_dir_1, self.CONF_BASE) fname2 = os.path.join(conf_dir_2, self.CONF_BASE) if os.access(fname1, os.F_OK | os.R_OK): fname = fname1 elif os.access(fname2, os.F_OK | os.R_OK): fname = fname2 else: continue try: self.loadcfg(fname, conf_type) except ParsecError as exc: if conf_type == upgrader.SITE_CONFIG: # Warn on bad site file (users can't fix it). LOG.warning( 'ignoring bad %s %s:\n%s', conf_type, fname, exc) else: # Abort on bad user file (users can fix it). LOG.error('bad %s %s', conf_type, fname) raise break elif conf_path_str: # CYLC_CONF_PATH defined with a value for path in conf_path_str.split(os.pathsep): fname = os.path.join(path, self.CONF_BASE) if os.access(fname, os.F_OK | os.R_OK): self.loadcfg(fname, upgrader.USER_CONFIG) # (OK if no global.rc is found, just use system defaults). self.transform()
def load(self): """Load or reload configuration from files.""" self.sparse.clear() self.dense.clear() LOG.debug("Loading site/user config files") conf_path_str = os.getenv("CYLC_CONF_PATH") if conf_path_str: # Explicit config file override. fname = os.path.join(conf_path_str, self.CONF_BASENAME) self._load(fname, upgrader.USER_CONFIG) elif conf_path_str is None: # Use default locations. for conf_type, conf_dir in self.conf_dir_hierarchy: fname = os.path.join(conf_dir, self.CONF_BASENAME) try: self._load(fname, conf_type) except ParsecError: LOG.error(f'bad {conf_type} {fname}') raise self._set_default_editors()
def _remote_init_callback(self, proc_ctx, host, owner, tmphandle): """Callback when "cylc remote-init" exits""" self.ready = True try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass if proc_ctx.ret_code == 0: for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED): if status in proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_init_map[(host, owner)] = status return # Bad status LOG.error( TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_INIT, (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) LOG.error(proc_ctx) self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
def _remote_init_callback(self, proc_ctx, host, owner, tmphandle): """Callback when "cylc remote-init" exits""" self.ready = True try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass if proc_ctx.ret_code == 0: for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED): if status in proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_init_map[(host, owner)] = status return # Bad status LOG.error(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_INIT, (host, owner), ' '.join(quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) LOG.error(proc_ctx) self.remote_init_map[(host, owner)] = REMOTE_INIT_FAILED
def extract_resources(target_dir, resources=None): """Extract cylc.flow resources and write them to a target directory. Arguments: target_dir - where to put extracted resources, created if necessary resources - list of name resources, e.g. ['etc/foo.bar'] """ if resources is None: resources = resource_names for resource in resources: if resource not in resource_names: raise ValueError(f"Invalid resource name {resource}") path = Path(target_dir, resource) LOG.debug(f"Extracting {resource} to {path}") pdir = path.parent if not pdir.exists(): pdir.mkdir(parents=True) # In spite of the name, this returns a byte array, not a string: res = pr.resource_string('cylc.flow', resource) with open(path, 'wb') as h: h.write(res)
def callback(self, ctx: SubFuncContext): """Callback for asynchronous xtrigger functions. Record satisfaction status and function results dict. Args: ctx (SubFuncContext): function context Raises: ValueError: if the context given is not active """ LOG.debug(ctx) sig = ctx.get_signature() self.active.remove(sig) try: satisfied, results = json.loads(ctx.out) except (ValueError, TypeError): return LOG.debug('%s: returned %s' % (sig, results)) if satisfied: self.pflag = True self.sat_xtrig[sig] = results
def remote_clean(reg, platform_names, timeout): """Run subprocesses to clean workflows on remote install targets (skip localhost), given a set of platform names to look up. Args: reg (str): Workflow name. platform_names (list): List of platform names to look up in the global config, in order to determine the install targets to clean on. timeout (str): Number of seconds to wait before cancelling. """ try: install_targets_map = ( get_install_target_to_platforms_map(platform_names)) except PlatformLookupError as exc: raise PlatformLookupError( "Cannot clean on remote platforms as the workflow database is " f"out of date/inconsistent with the global config - {exc}") pool = [] for target, platforms in install_targets_map.items(): if target == get_localhost_install_target(): continue shuffle(platforms) LOG.info( f"Cleaning on install target: {platforms[0]['install target']}") # Issue ssh command: pool.append( (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms) ) failed_targets = [] # Handle subproc pool results almost concurrently: while pool: for proc, target, platforms in pool: ret_code = proc.poll() if ret_code is None: # proc still running continue pool.remove((proc, target, platforms)) out, err = (f.decode() for f in proc.communicate()) if out: LOG.debug(out) if ret_code: # Try again using the next platform for this install target: this_platform = platforms.pop(0) excn = TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, this_platform['name'], " ".join(proc.args), ret_code, out, err) LOG.debug(excn) if platforms: pool.append( (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms) ) else: # Exhausted list of platforms failed_targets.append(target) elif err: LOG.debug(err) time.sleep(0.2) if failed_targets: raise CylcError( f"Could not clean on install targets: {', '.join(failed_targets)}")
def make_symlink(path: Union[Path, str], target: Union[Path, str]) -> bool: """Makes symlinks for directories. Args: path: Absolute path of the desired symlink. target: Absolute path of the symlink's target directory. """ path = Path(path) target = Path(target) if path.exists(): # note all three checks are needed here due to case where user has set # their own symlink which does not match the global config set one. if path.is_symlink() and target.exists() and path.samefile(target): # correct symlink already exists return False # symlink name is in use by a physical file or directory # log and return LOG.debug( f"Unable to create symlink to {target}. " f"The path {path} already exists.") return False elif path.is_symlink(): # remove a bad symlink. try: path.unlink() except OSError: raise WorkflowFilesError( f"Error when symlinking. Failed to unlink bad symlink {path}.") target.mkdir(parents=True, exist_ok=True) # This is needed in case share and share/cycle have the same symlink dir: if path.exists(): return False path.parent.mkdir(parents=True, exist_ok=True) try: path.symlink_to(target) return True except OSError as exc: raise WorkflowFilesError(f"Error when symlinking\n{exc}")
def _rank_good_hosts(self, all_host_stats): """Rank, by specified method, 'good' hosts to return the most suitable. Take a dictionary of hosts considered 'good' with the corresponding metric data, and rank them via the method specified in the global configuration, returning the lowest-ranked (taken as best) host. """ # Convert all dict values from full metrics structures to single # metric data values corresponding to the rank method to rank with. hosts_with_vals_to_rank = dict( (host, metric[self.rank_method]) for host, metric in all_host_stats.items()) LOG.debug( "INFO: host %s values extracted are: %s", self.rank_method, "\n".join(" %s: %s" % item for item in hosts_with_vals_to_rank.items())) # Sort new dict by value to return ascending-value ordered host list. sort_asc_hosts = sorted(hosts_with_vals_to_rank, key=hosts_with_vals_to_rank.get) base_msg = ("good (metric-returning) hosts were ranked in the " "following order, from most to least suitable: %s") if self.rank_method in ("memory", "disk-space:" + self.USE_DISK_PATH): # Want 'most free' i.e. highest => reverse asc. list for ranking. LOG.debug(base_msg, ', '.join(sort_asc_hosts[::-1])) return sort_asc_hosts[-1] else: # A load av. is only poss. left; 'random' dealt with earlier. # Want lowest => ranking given by asc. list. LOG.debug(base_msg, ', '.join(sort_asc_hosts)) return sort_asc_hosts[0]
def _rank_good_hosts(self, all_host_stats): """Rank, by specified method, 'good' hosts to return the most suitable. Take a dictionary of hosts considered 'good' with the corresponding metric data, and rank them via the method specified in the global configuration, returning the lowest-ranked (taken as best) host. """ # Convert all dict values from full metrics structures to single # metric data values corresponding to the rank method to rank with. hosts_with_vals_to_rank = dict( (host, metric[self.rank_method]) for host, metric in all_host_stats.items()) LOG.debug( "INFO: host %s values extracted are: %s", self.rank_method, "\n".join(" %s: %s" % item for item in hosts_with_vals_to_rank.items())) # Sort new dict by value to return ascending-value ordered host list. sort_asc_hosts = sorted( hosts_with_vals_to_rank, key=hosts_with_vals_to_rank.get) base_msg = ("good (metric-returning) hosts were ranked in the " "following order, from most to least suitable: %s") if self.rank_method in ("memory", "disk-space:" + self.USE_DISK_PATH): # Want 'most free' i.e. highest => reverse asc. list for ranking. LOG.debug(base_msg, ', '.join(sort_asc_hosts[::-1])) return sort_asc_hosts[-1] else: # A load av. is only poss. left; 'random' dealt with earlier. # Want lowest => ranking given by asc. list. LOG.debug(base_msg, ', '.join(sort_asc_hosts)) return sort_asc_hosts[0]
def _prep_submit_task_job_error(self, suite, itask, action, exc): """Helper for self._prep_submit_task_job. On error.""" LOG.debug("submit_num %s" % itask.submit_num) log_task_job_activity(SubProcContext(self.JOBS_SUBMIT, action, err=exc, ret_code=1), suite, itask.point, itask.tdef.name, submit_num=itask.submit_num) # Persist self.suite_db_mgr.put_insert_task_jobs( itask, { 'is_manual_submit': itask.is_manual_submit, 'try_num': itask.get_try_num(), 'time_submit': get_current_time_string(), 'batch_sys_name': itask.summary.get('batch_sys_name'), }) itask.is_manual_submit = False self.task_events_mgr.process_message( itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED)
def callback(self, ctx: SubFuncContext): """Callback for asynchronous xtrigger functions. Record satisfaction status and function results dict. Args: ctx (SubFuncContext): function context Raises: ValueError: if the context given is not active """ LOG.debug(ctx) sig = ctx.get_signature() self.active.remove(sig) try: satisfied, results = json.loads(ctx.out) except (ValueError, TypeError): return LOG.debug('%s: returned %s', sig, results) if satisfied: self.data_store_mgr.delta_task_xtrigger(sig, True) LOG.info('xtrigger satisfied: %s = %s', ctx.label, sig) self.sat_xtrig[sig] = results
def log_task_job_activity(ctx, suite, point, name, submit_num=None): """Log an activity for a task job.""" ctx_str = str(ctx) if not ctx_str: return if isinstance(ctx.cmd_key, tuple): # An event handler submit_num = ctx.cmd_key[-1] job_activity_log = get_task_job_activity_log(suite, point, name, submit_num) try: with open(job_activity_log, "ab") as handle: handle.write((ctx_str + '\n').encode()) except IOError as exc: # This happens when there is no job directory, e.g. if job host # selection command causes an submission failure, there will be no job # directory. In this case, just send the information to the suite log. LOG.exception(exc) LOG.info(ctx_str) if ctx.cmd and ctx.ret_code: LOG.error(ctx_str) elif ctx.cmd: LOG.debug(ctx_str)
def _listener(self): """The server main loop, listen for and serve requests.""" while True: # process any commands passed to the listener by its parent process if self.queue.qsize(): command = self.queue.get() if command == 'STOP': break else: raise ValueError('Unknown command "%s"' % command) try: # wait RECV_TIMEOUT for a message msg = self.socket.recv_string() except zmq.error.Again: # timeout, continue with the loop, this allows the listener # thread to stop continue # attempt to decode the message, authenticating the user in the # process try: message = self.decode(msg, self.secret()) except Exception as exc: # purposefully catch generic exception # failed to decode message, possibly resulting from failed # authentication import traceback return {'error': { 'message': str(exc), 'traceback': traceback.format_exc()}} else: # success case - serve the request LOG.debug('zmq:recv %s', message) res = self._receiver(message) response = self.encode(res, self.secret()) LOG.debug('zmq:send %s', res) # send back the response self.socket.send_string(response) sleep(0) # yield control to other threads
def addict(cfig, key, val, parents, index): """Add a new [parents...]key=value pair to a nested dict.""" for p in parents: # drop down the parent list cfig = cfig[p] if not isinstance(cfig, dict): # an item of this name has already been encountered at this level raise FileParseError( 'line %d: already encountered %s', index, itemstr(parents, key, val)) if key in cfig: oldval = cfig[key] # this item already exists if ( parents[0:2] == ['scheduling', 'graph'] or # BACK COMPAT: [scheduling][dependencies] # url: # https://github.com/cylc/cylc-flow/pull/3191 # from: # Cylc<=7 # to: # Cylc8 # remove at: # Cylc9 parents[0:2] == ['scheduling', 'dependencies'] ): # append the new graph string to the existing one if not isinstance(cfig, list): cfig[key] = [cfig[key]] cfig[key].append(val) else: cfig[key] = val LOG.debug( '%s: already exists in configuration:\nold: %s\nnew: %s', key, repr(oldval), repr(cfig[key])) # repr preserves \n else: cfig[key] = val
def load(config, additional_plugins=None): additional_plugins = additional_plugins or [] entry_points = { entry_point.name: entry_point for entry_point in pkg_resources.iter_entry_points('cylc.main_loop') } plugins = {'state': {}, 'timings': {}} for plugin_name in config['plugins'] + additional_plugins: # get plugin try: module_name = entry_points[plugin_name.replace(' ', '_')] except KeyError: raise UserInputError( f'No main-loop plugin: "{plugin_name}"\n' + ' Available plugins:\n' + indent('\n'.join(sorted(entry_points)), ' ')) # load plugin try: module = module_name.load() except Exception: raise CylcError(f'Could not load plugin: "{plugin_name}"') # load coroutines log = [] for coro_name, coro in ((coro_name, coro) for coro_name, coro in getmembers(module) if isfunction(coro) if hasattr(coro, 'main_loop')): log.append(coro_name) plugins.setdefault(coro.main_loop, {})[(plugin_name, coro_name)] = coro plugins['timings'][(plugin_name, coro_name)] = deque(maxlen=1) LOG.debug('Loaded main loop plugin "%s": %s', plugin_name + '\n', '\n'.join((f'* {x}' for x in log))) # set the initial state of the plugin plugins['state'][plugin_name] = {} # make a note of the config here for ease of reference plugins['config'] = config return plugins
def _remote_clean_cmd(reg, platform, timeout): """Remove a stopped workflow on a remote host. Call "cylc clean --local-only" over ssh and return the subprocess. Args: reg (str): Workflow name. platform (dict): Config for the platform on which to remove the workflow. timeout (str): Number of seconds to wait before cancelling the command. """ LOG.debug( f'Cleaning on install target: {platform["install target"]} ' f'(using platform: {platform["name"]})' ) cmd = construct_ssh_cmd( ['clean', '--local-only', reg], platform, timeout=timeout, set_verbosity=True ) LOG.debug(" ".join(cmd)) return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx): """Call back when log job retrieval completes.""" if proc_ctx.ret_code: LOG.error(proc_ctx) else: LOG.debug(proc_ctx) for id_key in proc_ctx.cmd_kwargs["id_keys"]: key1, point, name, submit_num = id_key try: # All completed jobs are expected to have a "job.out". fnames = [JOB_LOG_OUT] try: if key1[1] not in 'succeeded': fnames.append(JOB_LOG_ERR) except TypeError: pass fname_oks = {} for fname in fnames: fname_oks[fname] = os.path.exists( get_task_job_log(schd_ctx.suite, point, name, submit_num, fname)) # All expected paths must exist to record a good attempt log_ctx = SubProcContext((key1, submit_num), None) if all(fname_oks.values()): log_ctx.ret_code = 0 del self.event_timers[id_key] else: log_ctx.ret_code = 1 log_ctx.err = "File(s) not retrieved:" for fname, exist_ok in sorted(fname_oks.items()): if not exist_ok: log_ctx.err += " %s" % fname self.event_timers[id_key].unset_waiting() log_task_job_activity(log_ctx, schd_ctx.suite, point, name, submit_num) except KeyError as exc: LOG.exception(exc)
def _file_install_callback(self, ctx, platform, install_target): """Callback when file installation exits. Sets remote_init_map to REMOTE_FILE_INSTALL_DONE on success and to REMOTE_FILE_INSTALL_FAILED on error. """ if ctx.out: RSYNC_LOG.info('File installation information for ' f'{install_target}:\n{ctx.out}') if ctx.ret_code == 0: # Both file installation and remote init success LOG.debug(f"File installation complete for {install_target}") self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE self.ready = True return else: self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_FAILED LOG.error( PlatformError( PlatformError.MSG_INIT, platform['name'], ctx=ctx, )) self.ready = True
def create_cylc_run_tree(self, suite): """Create all top-level cylc-run output dirs on the suite host.""" cfg = self.get() item = 'suite run directory' idir = self.get_derived_host_item(suite, item) LOG.debug('creating %s: %s', item, idir) if cfg['enable run directory housekeeping']: self.roll_directory( idir, item, cfg['run directory rolling archive length']) for item in [ 'suite log directory', 'suite job log directory', 'suite config log directory', 'suite work directory', 'suite share directory']: idir = self.get_derived_host_item(suite, item) LOG.debug('creating %s: %s', item, idir) self.create_directory(idir, item) item = 'temporary directory' value = cfg[item] if value: self.create_directory(value, item)
def file_install(self, platform): """Install required files on the remote install target. Included by default in the file installation: Files: .service/server.key (required for ZMQ authentication) Directories: app/ bin/ etc/ lib/ """ install_target = platform['install target'] self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS src_path = get_workflow_run_dir(self.workflow) dst_path = get_remote_workflow_run_dir(self.workflow) install_target = platform['install target'] ctx = SubProcContext( 'file-install', construct_rsync_over_ssh_cmd(src_path, dst_path, platform, self.rsync_includes)) LOG.debug(f"Begin file installation on {install_target}") self.proc_pool.put_command(ctx, self._file_install_callback, [install_target])
def get_vc_info(path: Union[Path, str]) -> Optional['OrderedDict[str, str]']: """Return the version control information for a repository, given its path. """ info = OrderedDict() missing_base = False for vcs, args in INFO_COMMANDS.items(): try: out = _run_cmd(vcs, args, cwd=path) except VCSNotInstalledError as exc: LOG.debug(exc) continue except VCSMissingBaseError as exc: missing_base = True LOG.debug(exc) except OSError as exc: if not any( exc.strerror.lower().startswith(err) for err in NOT_REPO_ERRS[vcs] ): raise exc else: LOG.debug(f"Source dir {path} is not a {vcs} repository") continue info['version control system'] = vcs if vcs == SVN: info.update(_parse_svn_info(out)) elif vcs == GIT: if not missing_base: info['repository version'] = out.splitlines()[0] info['commit'] = _get_git_commit(path) info['working copy root path'] = str(path) info['status'] = get_status(vcs, path) LOG.debug(f"{vcs} repository detected") return info return None
def remove_dir_or_file(path: Union[Path, str]) -> None: """Delete a directory tree, or a file, or a symlink. Does not follow symlinks. Args: path: the absolute path of the directory/file/symlink to delete. """ if not os.path.isabs(path): raise ValueError("Path must be absolute") if os.path.islink(path): LOG.debug(f"Removing symlink: {path}") os.remove(path) elif os.path.isfile(path): LOG.debug(f"Removing file: {path}") os.remove(path) else: LOG.debug(f"Removing directory: {path}") rmtree(path, onerror=handle_rmtree_err)
def upgrade_to_platforms(self): """upgrade [job]batch system and [remote]host to platform * Add 'platform' and 'user' columns to table task_jobs. * Remove 'user_at_host' and 'batch_sys_name' columns Returns: bool - True if upgrade performed, False if upgrade skipped. """ conn = self.connect() # check if upgrade required schema = conn.execute(rf'PRAGMA table_info({self.TABLE_TASK_JOBS})') for _, name, *_ in schema: if name == 'platform_name': LOG.debug('platform_name column present - skipping db upgrade') return False # Perform upgrade: table = self.TABLE_TASK_JOBS LOG.info('Upgrade to Cylc 8 platforms syntax') conn.execute(rf''' ALTER TABLE {table} ADD COLUMN user TEXT ''') conn.execute(rf''' ALTER TABLE {table} ADD COLUMN platform_name TEXT ''') job_platforms = glbl_cfg(cached=False).get(['platforms']) for cycle, name, user_at_host, batch_system in conn.execute(rf''' SELECT cycle, name, user_at_host, batch_system FROM {table} '''): match = re.match(r"(?P<user>\S+)@(?P<host>\S+)", user_at_host) if match: user = match.group('user') host = match.group('host') else: user = '' host = user_at_host platform = platform_from_job_info(job_platforms, {'batch system': batch_system}, {'host': host}) conn.execute( rf''' UPDATE {table} SET user=?, platform_name=? WHERE cycle==? AND name==? ''', (user, platform, cycle, name)) conn.commit() return True
def parse(fpath, output_fname=None, template_vars=None): """Parse file items line-by-line into a corresponding nested dict.""" # read and process the file (jinja2, include-files, line continuation) flines = read_and_proc(fpath, template_vars) if output_fname: with open(output_fname, 'w') as handle: handle.write('\n'.join(flines) + '\n') LOG.debug('Processed configuration dumped: %s', output_fname) nesting_level = 0 config = OrderedDictWithDefaults() parents = [] maxline = len(flines) - 1 index = -1 while index < maxline: index += 1 line = flines[index] if re.match(_LINECOMMENT, line): # skip full-line comments continue if re.match(_BLANKLINE, line): # skip blank lines continue m = re.match(_HEADING, line) if m: # matched a section heading s_open, sect_name, s_close = m.groups()[1:-1] nb = len(s_open) if nb != len(s_close): raise FileParseError('bracket mismatch', index, line) elif nb == nesting_level: # sibling section parents = parents[:-1] + [sect_name] elif nb == nesting_level + 1: # child section parents = parents + [sect_name] elif nb < nesting_level: # back up one or more levels ndif = nesting_level - nb parents = parents[:-ndif - 1] + [sect_name] else: raise FileParseError( 'Error line ' + str(index + 1) + ': ' + line) nesting_level = nb addsect(config, sect_name, parents[:-1]) else: m = re.match(_KEY_VALUE, line) if m: # matched a key=value item key, _, val = m.groups()[1:] if val.startswith('"""') or val.startswith("'''"): # triple quoted - may be a multiline value val, index = multiline(flines, val, index, maxline) addict(config, key, val, parents, index) else: # no match raise FileParseError( 'Invalid line ' + str(index + 1) + ': ' + line) return config
def read_and_proc(fpath, template_vars=None, viewcfg=None, asedit=False): """ Read a cylc parsec config file (at fpath), inline any include files, process with Jinja2, and concatenate continuation lines. Jinja2 processing must be done before concatenation - it could be used to generate continuation lines. """ fdir = os.path.dirname(fpath) # Allow Python modules in lib/python/ (e.g. for use by Jinja2 filters). suite_lib_python = os.path.join(fdir, "lib", "python") if os.path.isdir(suite_lib_python) and suite_lib_python not in sys.path: sys.path.append(suite_lib_python) LOG.debug('Reading file %s', fpath) # read the file into a list, stripping newlines with open(fpath) as f: flines = [line.rstrip('\n') for line in f] do_inline = True do_empy = True do_jinja2 = True do_contin = True if viewcfg: if not viewcfg['empy']: do_empy = False if not viewcfg['jinja2']: do_jinja2 = False if not viewcfg['contin']: do_contin = False if not viewcfg['inline']: do_inline = False # inline any cylc include-files if do_inline: flines = inline( flines, fdir, fpath, False, viewcfg=viewcfg, for_edit=asedit) # process with EmPy if do_empy: if flines and re.match(r'^#![Ee]m[Pp]y\s*', flines[0]): LOG.debug('Processing with EmPy') try: from cylc.flow.parsec.empysupport import empyprocess except (ImportError, ModuleNotFoundError): raise ParsecError('EmPy Python package must be installed ' 'to process file: ' + fpath) flines = empyprocess(flines, fdir, template_vars) # process with Jinja2 if do_jinja2: if flines and re.match(r'^#![jJ]inja2\s*', flines[0]): LOG.debug('Processing with Jinja2') try: from cylc.flow.parsec.jinja2support import jinja2process except (ImportError, ModuleNotFoundError): raise ParsecError('Jinja2 Python package must be installed ' 'to process file: ' + fpath) flines = jinja2process(flines, fdir, template_vars) # concatenate continuation lines if do_contin: flines = _concatenate(flines) # return rstripped lines return [fl.rstrip() for fl in flines]
def _prep_submit_task_job(self, suite, itask, dry_run, check_syntax=True): """Prepare a task job submission. Return itask on a good preparation. """ if itask.local_job_file_path and not dry_run: return itask # Handle broadcasts overrides = self.task_events_mgr.broadcast_mgr.get_broadcast( itask.identity) if overrides: rtconfig = pdeepcopy(itask.tdef.rtconfig) poverride(rtconfig, overrides, prepend=True) else: rtconfig = itask.tdef.rtconfig # Determine task host settings now, just before job submission, # because dynamic host selection may be used. try: task_host = self.task_remote_mgr.remote_host_select( rtconfig['remote']['host']) except TaskRemoteMgmtError as exc: # Submit number not yet incremented itask.submit_num += 1 itask.summary['job_hosts'][itask.submit_num] = '' # Retry delays, needed for the try_num self._set_retry_timers(itask, rtconfig) self._prep_submit_task_job_error(suite, itask, dry_run, '(remote host select)', exc) return False else: if task_host is None: # host select not ready itask.set_summary_message(self.REMOTE_SELECT_MSG) return itask.task_host = task_host # Submit number not yet incremented itask.submit_num += 1 # Retry delays, needed for the try_num self._set_retry_timers(itask, rtconfig) try: job_conf = self._prep_submit_task_job_impl(suite, itask, rtconfig) local_job_file_path = get_task_job_job_log(suite, itask.point, itask.tdef.name, itask.submit_num) self.job_file_writer.write(local_job_file_path, job_conf, check_syntax=check_syntax) except Exception as exc: # Could be a bad command template, IOError, etc self._prep_submit_task_job_error(suite, itask, dry_run, '(prepare job file)', exc) return False itask.local_job_file_path = local_job_file_path job_config = deepcopy(job_conf) job_config['logfiles'] = deepcopy(itask.summary['logfiles']) job_config['job_log_dir'] = get_task_job_log(suite, itask.point, itask.tdef.name, itask.submit_num) itask.jobs.append(job_config['job_d']) self.job_pool.insert_job(job_config) if dry_run: itask.set_summary_message(self.DRY_RUN_MSG) self.job_pool.add_job_msg(job_config['job_d'], self.DRY_RUN_MSG) LOG.debug(f'[{itask}] -{self.DRY_RUN_MSG}') # Return value used by "cylc submit" and "cylc jobscript": return itask
async def async_request( self, command: str, args: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None, req_meta: Optional[Dict[str, Any]] = None) -> object: """Send an asynchronous request using asyncio. Has the same arguments and return values as ``serial_request``. """ timeout = (float(timeout) * 1000 if timeout else None) or self.timeout if not args: args = {} # Note: we are using CurveZMQ to secure the messages (see # self.curve_auth, self.socket.curve_...key etc.). We have set up # public-key cryptography on the ZMQ messaging and sockets, so # there is no need to encrypt messages ourselves before sending. # send message msg: Dict[str, Any] = {'command': command, 'args': args} msg.update(self.header) # add the request metadata if req_meta: msg['meta'].update(req_meta) LOG.debug('zmq:send %s', msg) message = encode_(msg) self.socket.send_string(message) # receive response if self.poller.poll(timeout): res = await self.socket.recv() else: if callable(self.timeout_handler): self.timeout_handler() host, port, _ = get_location(self.workflow) if host != self.host or port != self.port: raise WorkflowStopped(self.workflow) raise ClientTimeout( 'Timeout waiting for server response.' ' This could be due to network or server issues.' ' Check the workflow log.') if msg['command'] in PB_METHOD_MAP: response = {'data': res} else: response = decode_(res.decode()) LOG.debug('zmq:recv %s', response) try: return response['data'] except KeyError: error = response.get( 'error', {'message': f'Received invalid response: {response}'}, ) raise ClientError( error.get('message'), error.get('traceback'), )
def detect_old_contact_file(self, reg, check_host_port=None): """Detect old suite contact file. If an old contact file does not exist, do nothing. If one does exist but the suite process is definitely not alive, remove it. If one exists and the suite process is still alive, raise SuiteServiceFileError. If check_host_port is specified and does not match the (host, port) value in the old contact file, raise AssertionError. Args: reg (str): suite name check_host_port (tuple): (host, port) to check against Raise: AssertionError: If old contact file exists but does not have matching (host, port) with value of check_host_port. SuiteServiceFileError: If old contact file exists and the suite process still alive. """ # An old suite of the same name may be running if a contact file exists # and can be loaded. try: data = self.load_contact_file(reg) old_host = data[self.KEY_HOST] old_port = data[self.KEY_PORT] old_proc_str = data[self.KEY_PROCESS] except (IOError, ValueError, SuiteServiceFileError): # Contact file does not exist or corrupted, should be OK to proceed return if check_host_port and check_host_port != (old_host, int(old_port)): raise AssertionError("%s != (%s, %s)" % ( check_host_port, old_host, old_port)) # Run the "ps" command to see if the process is still running or not. # If the old suite process is still running, it should show up with the # same command line as before. # Terminate command after 10 seconds to prevent hanging, etc. old_pid_str = old_proc_str.split(None, 1)[0].strip() cmd = ["timeout", "10", "ps", self.PS_OPTS, str(old_pid_str)] if is_remote_host(old_host): import shlex ssh_str = str(glbl_cfg().get_host_item("ssh command", old_host)) cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd from subprocess import Popen, PIPE from time import sleep, time proc = Popen(cmd, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE) # Terminate command after 10 seconds to prevent hanging SSH, etc. timeout = time() + 10.0 while proc.poll() is None: if time() > timeout: proc.terminate() sleep(0.1) fname = self.get_contact_file(reg) ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) if ret_code: LOG.debug("$ %s # return %d\n%s", ' '.join(cmd), ret_code, err) for line in reversed(out.splitlines()): if line.strip() == old_proc_str: # Suite definitely still running break elif line.split(None, 1)[0].strip() == "PID": # Only "ps" header - "ps" has run, but no matching results. # Suite not running. Attempt to remove suite contact file. try: os.unlink(fname) return except OSError: break raise SuiteServiceFileError( ( r"""suite contact file exists: %(fname)s Suite "%(suite)s" is already running, and listening at "%(host)s:%(port)s". To start a new run, stop the old one first with one or more of these: * cylc stop %(suite)s # wait for active tasks/event handlers * cylc stop --kill %(suite)s # kill active tasks and wait * cylc stop --now %(suite)s # don't wait for active tasks * cylc stop --now --now %(suite)s # don't wait * ssh -n "%(host)s" kill %(pid)s # final brute force! """ ) % { "host": old_host, "port": old_port, "pid": old_pid_str, "fname": fname, "suite": reg, } )
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': get_remote_suite_run_dir('localhost', owner, reg), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = (r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''') % { 'prefix': prefix, 'run_d': get_remote_suite_run_dir(host, owner, reg), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split(glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE, DEVNULL # nosec try: proc = Popen(command, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) # nosec except OSError: if cylc.flow.flags.debug: import traceback traceback.print_exc() return out, err = (f.decode() for f in proc.communicate()) ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def detect_old_contact_file(self, reg, check_host_port=None): """Detect old suite contact file. If an old contact file does not exist, do nothing. If one does exist but the suite process is definitely not alive, remove it. If one exists and the suite process is still alive, raise SuiteServiceFileError. If check_host_port is specified and does not match the (host, port) value in the old contact file, raise AssertionError. Args: reg (str): suite name check_host_port (tuple): (host, port) to check against Raise: AssertionError: If old contact file exists but does not have matching (host, port) with value of check_host_port. SuiteServiceFileError: If old contact file exists and the suite process still alive. """ # An old suite of the same name may be running if a contact file exists # and can be loaded. try: data = self.load_contact_file(reg) old_host = data[self.KEY_HOST] old_port = data[self.KEY_PORT] old_proc_str = data[self.KEY_PROCESS] except (IOError, ValueError, SuiteServiceFileError): # Contact file does not exist or corrupted, should be OK to proceed return if check_host_port and check_host_port != (old_host, int(old_port)): raise AssertionError("%s != (%s, %s)" % (check_host_port, old_host, old_port)) # Run the "ps" command to see if the process is still running or not. # If the old suite process is still running, it should show up with the # same command line as before. # Terminate command after 10 seconds to prevent hanging, etc. old_pid_str = old_proc_str.split(None, 1)[0].strip() cmd = ["timeout", "10", "ps", self.PS_OPTS, str(old_pid_str)] if is_remote_host(old_host): import shlex ssh_str = str(glbl_cfg().get_host_item("ssh command", old_host)) cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd from subprocess import Popen, PIPE, DEVNULL # nosec from time import sleep, time proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) # nosec # Terminate command after 10 seconds to prevent hanging SSH, etc. timeout = time() + 10.0 while proc.poll() is None: if time() > timeout: proc.terminate() sleep(0.1) fname = self.get_contact_file(reg) ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) if ret_code: LOG.debug("$ %s # return %d\n%s", ' '.join(cmd), ret_code, err) for line in reversed(out.splitlines()): if line.strip() == old_proc_str: # Suite definitely still running break elif line.split(None, 1)[0].strip() == "PID": # Only "ps" header - "ps" has run, but no matching results. # Suite not running. Attempt to remove suite contact file. try: os.unlink(fname) return except OSError: break raise SuiteServiceFileError( (r"""suite contact file exists: %(fname)s Suite "%(suite)s" is already running, and listening at "%(host)s:%(port)s". To start a new run, stop the old one first with one or more of these: * cylc stop %(suite)s # wait for active tasks/event handlers * cylc stop --kill %(suite)s # kill active tasks and wait * cylc stop --now %(suite)s # don't wait for active tasks * cylc stop --now --now %(suite)s # don't wait * ssh -n "%(host)s" kill %(pid)s # final brute force! """) % { "host": old_host, "port": old_port, "pid": old_pid_str, "fname": fname, "suite": reg, })
def jinja2process(flines, dir_, template_vars=None): """Pass configure file through Jinja2 processor.""" # Load file lines into a template, excluding '#!jinja2' so that # '#!cylc-x.y.z' rises to the top. Callers should handle jinja2 # TemplateSyntaxerror and TemplateError. if template_vars: LOG.debug( 'Setting Jinja2 template variables:\n%s', '\n'.join( ['+ %s=%s' % item for item in sorted(template_vars.items())])) # Jinja2 render method requires a dictionary as argument (not None): if not template_vars: template_vars = {} # CALLERS SHOULD HANDLE JINJA2 TEMPLATESYNTAXERROR AND TEMPLATEERROR # AND TYPEERROR (e.g. for not using "|int" filter on number inputs. # Convert unicode to plain str, ToDo - still needed for parsec?) try: env = jinja2environment(dir_) template = env.from_string('\n'.join(flines[1:])) lines = str(template.render(template_vars)).splitlines() except TemplateSyntaxError as exc: filename = None # extract source lines if exc.lineno and exc.source and not exc.filename: # error in suite.rc or cylc include file lines = exc.source.splitlines() elif exc.lineno and exc.filename: # error in jinja2 include file filename = os.path.relpath(exc.filename, dir_) with open(exc.filename, 'r') as include_file: include_file.seek(max(exc.lineno - CONTEXT_LINES, 0), 0) lines = [] for _ in range(CONTEXT_LINES): lines.append(include_file.readline().splitlines()[0]) if lines: # extract context lines from source lines lines = lines[max(exc.lineno - CONTEXT_LINES, 0):exc.lineno] raise Jinja2Error(exc, lines=lines, filename=filename) except Exception as exc: lineno = get_error_location() lines = None if lineno: lineno += 1 # shebang line ignored by jinja2 lines = flines[max(lineno - CONTEXT_LINES, 0):lineno] raise Jinja2Error(exc, lines=lines) suiterc = [] for line in lines: # Jinja2 leaves blank lines where source lines contain # only Jinja2 code; this matters if line continuation # markers are involved, so we remove blank lines here. if not line.strip(): continue # restoring newlines here is only necessary for display by # the cylc view command: # ##suiterc.append(line + '\n') suiterc.append(line) return suiterc
def construct_ssh_cmd(raw_cmd, user=None, host=None, forward_x11=False, stdin=False, ssh_login_shell=None, ssh_cylc=None, set_UTC=False, allow_flag_opts=False): """Append a bare command with further options required to run via ssh. Arguments: raw_cmd (list): primitive command to run remotely. user (string): user ID for the remote login. host (string): remote host name. Use 'localhost' if not specified. forward_x11 (boolean): If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'. stdin: If None, the `-n` option will be added to the SSH command line. ssh_login_shell (boolean): If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`. ssh_cylc (string): Location of the remote cylc executable. set_UTC (boolean): If True, check UTC mode and specify if set to True (non-default). allow_flag_opts (boolean): If True, check CYLC_DEBUG and CYLC_VERBOSE and if non-default, specify debug and/or verbosity as options to the 'raw cmd'. Return: A list containing a chosen command including all arguments and options necessary to directly execute the bare command on a given host via ssh. """ command = shlex.split(glbl_cfg().get_host_item('ssh command', host, user)) if forward_x11: command.append('-Y') if stdin is None: command.append('-n') user_at_host = '' if user: user_at_host = user + '@' if host: user_at_host += host else: user_at_host += 'localhost' command.append(user_at_host) # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through. command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)] try: command.append( quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH'])) except KeyError: pass if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]: command.append(quote(r'CYLC_UTC=True')) command.append(quote(r'TZ=UTC')) # Use bash -l? if ssh_login_shell is None: ssh_login_shell = glbl_cfg().get_host_item('use login shell', host, user) if ssh_login_shell: # A login shell will always source /etc/profile and the user's bash # profile file. To avoid having to quote the entire remote command # it is passed as arguments to the bash script. command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')] # 'cylc' on the remote host if ssh_cylc: command.append(ssh_cylc) else: ssh_cylc = glbl_cfg().get_host_item('cylc executable', host, user) if ssh_cylc.endswith('cylc'): command.append(ssh_cylc) else: # TODO - raise appropriate exception raise ValueError( r'ERROR: bad cylc executable in global config: %s' % ssh_cylc) # Insert core raw command after ssh, but before its own, command options. command += raw_cmd if allow_flag_opts: if (cylc.flow.flags.verbose or os.getenv('CYLC_VERBOSE') in ["True", "true"]): command.append(r'--verbose') if (cylc.flow.flags.debug or os.getenv('CYLC_DEBUG') in ["True", "true"]): command.append(r'--debug') if LOG.handlers: LOG.debug("$ %s", ' '.join(quote(c) for c in command)) elif cylc.flow.flags.debug: sys.stderr.write("$ %s\n" % ' '.join(quote(c) for c in command)) return command
def read_and_proc(fpath, template_vars=None, viewcfg=None, asedit=False): """ Read a cylc parsec config file (at fpath), inline any include files, process with Jinja2, and concatenate continuation lines. Jinja2 processing must be done before concatenation - it could be used to generate continuation lines. """ fdir = os.path.dirname(fpath) # Allow Python modules in lib/python/ (e.g. for use by Jinja2 filters). suite_lib_python = os.path.join(fdir, "lib", "python") if os.path.isdir(suite_lib_python) and suite_lib_python not in sys.path: sys.path.append(suite_lib_python) LOG.debug('Reading file %s', fpath) # read the file into a list, stripping newlines with open(fpath) as f: flines = [line.rstrip('\n') for line in f] do_inline = True do_empy = True do_jinja2 = True do_contin = True if viewcfg: if not viewcfg['empy']: do_empy = False if not viewcfg['jinja2']: do_jinja2 = False if not viewcfg['contin']: do_contin = False if not viewcfg['inline']: do_inline = False # inline any cylc include-files if do_inline: flines = inline( flines, fdir, fpath, False, viewcfg=viewcfg, for_edit=asedit) # process with EmPy if do_empy: if flines and re.match(r'^#![Ee]m[Pp]y\s*', flines[0]): LOG.debug('Processing with EmPy') try: from cylc.flow.parsec.empysupport import empyprocess except (ImportError, ModuleNotFoundError): raise ParsecError('EmPy Python package must be installed ' 'to process file: ' + fpath) flines = empyprocess(flines, fdir, template_vars) # process with Jinja2 if do_jinja2: if flines and re.match(r'^#![jJ]inja2\s*', flines[0]): LOG.debug('Processing with Jinja2') try: from cylc.flow.parsec.jinja2support import jinja2process except (ImportError, ModuleNotFoundError): raise ParsecError('Jinja2 Python package must be installed ' 'to process file: ' + fpath) flines = jinja2process(flines, fdir, template_vars) # concatenate continuation lines if do_contin: flines = _concatenate(flines) # return rstripped lines return [fl.rstrip() for fl in flines]
def submit_task_jobs(self, suite, itasks, is_simulation=False): """Prepare and submit task jobs. Submit tasks where possible. Ignore tasks that are waiting for host select command to complete, or tasks that are waiting for remote initialisation. Bad host select command, error writing to a job file or bad remote initialisation will cause a bad task - leading to submission failure. This method uses prep_submit_task_job() as helper. Return (list): list of tasks that attempted submission. """ if is_simulation: return self._simulation_submit_task_jobs(itasks) # Prepare tasks for job submission prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks) # Reset consumed host selection results self.task_remote_mgr.remote_host_select_reset() if not prepared_tasks: return bad_tasks # Group task jobs by (host, owner) auth_itasks = {} # {(host, owner): [itask, ...], ...} for itask in prepared_tasks: auth_itasks.setdefault((itask.task_host, itask.task_owner), []) auth_itasks[(itask.task_host, itask.task_owner)].append(itask) # Submit task jobs for each (host, owner) group done_tasks = bad_tasks for (host, owner), itasks in sorted(auth_itasks.items()): is_init = self.task_remote_mgr.remote_init(host, owner) if is_init is None: # Remote is waiting to be initialised for itask in itasks: itask.set_summary_message(self.REMOTE_INIT_MSG) continue # Ensure that localhost background/at jobs are recorded as running # on the host name of the current suite host, rather than just # "localhost". On suite restart on a different suite host, this # allows the restart logic to correctly poll the status of the # background/at jobs that may still be running on the previous # suite host. if ( self.batch_sys_mgr.is_job_local_to_host( itask.summary['batch_sys_name']) and not is_remote_host(host) ): owner_at_host = get_host() else: owner_at_host = host # Persist if owner: owner_at_host = owner + '@' + owner_at_host now_str = get_current_time_string() done_tasks.extend(itasks) for itask in itasks: # Log and persist LOG.info( '[%s] -submit-num=%02d, owner@host=%s', itask, itask.submit_num, owner_at_host) self.suite_db_mgr.put_insert_task_jobs(itask, { 'is_manual_submit': itask.is_manual_submit, 'try_num': itask.get_try_num(), 'time_submit': now_str, 'user_at_host': owner_at_host, 'batch_sys_name': itask.summary['batch_sys_name'], }) itask.is_manual_submit = False if is_init == REMOTE_INIT_FAILED: # Remote has failed to initialise # Set submit-failed for all affected tasks for itask in itasks: itask.local_job_file_path = None # reset for retry log_task_job_activity( SubProcContext( self.JOBS_SUBMIT, '(init %s)' % owner_at_host, err=REMOTE_INIT_FAILED, ret_code=1), suite, itask.point, itask.tdef.name) self.task_events_mgr.process_message( itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED) continue # Build the "cylc jobs-submit" command cmd = ['cylc', self.JOBS_SUBMIT] if LOG.isEnabledFor(DEBUG): cmd.append('--debug') if get_utc_mode(): cmd.append('--utc-mode') remote_mode = False kwargs = {} for key, value, test_func in [ ('host', host, is_remote_host), ('user', owner, is_remote_user)]: if test_func(value): cmd.append('--%s=%s' % (key, value)) remote_mode = True kwargs[key] = value if remote_mode: cmd.append('--remote-mode') cmd.append('--') cmd.append(glbl_cfg().get_derived_host_item( suite, 'suite job log directory', host, owner)) # Chop itasks into a series of shorter lists if it's very big # to prevent overloading of stdout and stderr pipes. itasks = sorted(itasks, key=lambda itask: itask.identity) chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1 itasks_batches = [ itasks[i:i + chunk_size] for i in range(0, len(itasks), chunk_size)] LOG.debug( '%s ... # will invoke in batches, sizes=%s', cmd, [len(b) for b in itasks_batches]) for i, itasks_batch in enumerate(itasks_batches): stdin_files = [] job_log_dirs = [] for itask in itasks_batch: if remote_mode: stdin_files.append( get_task_job_job_log( suite, itask.point, itask.tdef.name, itask.submit_num)) job_log_dirs.append(get_task_job_id( itask.point, itask.tdef.name, itask.submit_num)) # The job file is now (about to be) used: reset the file # write flag so that subsequent manual retrigger will # generate a new job file. itask.local_job_file_path = None itask.state.reset_state(TASK_STATUS_READY) if itask.state.outputs.has_custom_triggers(): self.suite_db_mgr.put_update_task_outputs(itask) self.proc_pool.put_command( SubProcContext( self.JOBS_SUBMIT, cmd + job_log_dirs, stdin_files=stdin_files, job_log_dirs=job_log_dirs, **kwargs ), self._submit_task_jobs_callback, [suite, itasks_batch]) return done_tasks
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', 'localhost', owner, replace_home=False), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = ( r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''' ) % { 'prefix': prefix, 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', host, owner), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split( glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE try: proc = Popen( command, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE) except OSError: if cylc.flow.flags.debug: import traceback traceback.print_exc() return out, err = (f.decode() for f in proc.communicate()) ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def _prep_submit_task_job(self, suite, itask, dry_run, check_syntax=True): """Prepare a task job submission. Return itask on a good preparation. """ if itask.local_job_file_path and not dry_run: return itask # Handle broadcasts overrides = self.task_events_mgr.broadcast_mgr.get_broadcast( itask.identity) if overrides: rtconfig = pdeepcopy(itask.tdef.rtconfig) poverride(rtconfig, overrides, prepend=True) else: rtconfig = itask.tdef.rtconfig # Determine task host settings now, just before job submission, # because dynamic host selection may be used. try: task_host = self.task_remote_mgr.remote_host_select( rtconfig['remote']['host']) except TaskRemoteMgmtError as exc: # Submit number not yet incremented itask.submit_num += 1 itask.summary['job_hosts'][itask.submit_num] = '' # Retry delays, needed for the try_num self._set_retry_timers(itask, rtconfig) self._prep_submit_task_job_error( suite, itask, dry_run, '(remote host select)', exc) return False else: if task_host is None: # host select not ready itask.set_summary_message(self.REMOTE_SELECT_MSG) return itask.task_host = task_host # Submit number not yet incremented itask.submit_num += 1 # Retry delays, needed for the try_num self._set_retry_timers(itask, rtconfig) try: job_conf = self._prep_submit_task_job_impl(suite, itask, rtconfig) local_job_file_path = get_task_job_job_log( suite, itask.point, itask.tdef.name, itask.submit_num) self.job_file_writer.write(local_job_file_path, job_conf, check_syntax=check_syntax) except Exception as exc: # Could be a bad command template, IOError, etc self._prep_submit_task_job_error( suite, itask, dry_run, '(prepare job file)', exc) return False itask.local_job_file_path = local_job_file_path if dry_run: itask.set_summary_message('job file written (edit/dry-run)') LOG.debug('[%s] -%s', itask, itask.summary['latest_message']) # Return value used by "cylc submit" and "cylc jobscript": return itask
def jinja2process(flines, dir_, template_vars=None): """Pass configure file through Jinja2 processor.""" # Load file lines into a template, excluding '#!jinja2' so that # '#!cylc-x.y.z' rises to the top. Callers should handle jinja2 # TemplateSyntaxerror and TemplateError. if template_vars: LOG.debug( 'Setting Jinja2 template variables:\n%s', '\n'.join( ['+ %s=%s' % item for item in sorted(template_vars.items())])) # Jinja2 render method requires a dictionary as argument (not None): if not template_vars: template_vars = {} # CALLERS SHOULD HANDLE JINJA2 TEMPLATESYNTAXERROR AND TEMPLATEERROR # AND TYPEERROR (e.g. for not using "|int" filter on number inputs. # Convert unicode to plain str, ToDo - still needed for parsec?) try: env = jinja2environment(dir_) template = env.from_string('\n'.join(flines[1:])) lines = str(template.render(template_vars)).splitlines() except TemplateSyntaxError as exc: filename = None # extract source lines if exc.lineno and exc.source and not exc.filename: # error in flow.cylc or cylc include file lines = exc.source.splitlines() elif exc.lineno and exc.filename: # error in jinja2 include file filename = os.path.relpath(exc.filename, dir_) with open(exc.filename, 'r') as include_file: include_file.seek(max(exc.lineno - CONTEXT_LINES, 0), 0) lines = [] for _ in range(CONTEXT_LINES): lines.append(include_file.readline().splitlines()[0]) if lines: # extract context lines from source lines lines = lines[max(exc.lineno - CONTEXT_LINES, 0):exc.lineno] raise Jinja2Error(exc, lines=lines, filename=filename) except Exception as exc: lineno = get_error_location() lines = None if lineno: lineno += 1 # shebang line ignored by jinja2 lines = flines[max(lineno - CONTEXT_LINES, 0):lineno] raise Jinja2Error(exc, lines=lines) flow_config = [] for line in lines: # Jinja2 leaves blank lines where source lines contain # only Jinja2 code; this matters if line continuation # markers are involved, so we remove blank lines here. if not line.strip(): continue # restoring newlines here is only necessary for display by # the cylc view command: # ##flow_config.append(line + '\n') flow_config.append(line) return flow_config
def submit_task_jobs(self, suite, itasks, is_simulation=False): """Prepare and submit task jobs. Submit tasks where possible. Ignore tasks that are waiting for host select command to complete, or tasks that are waiting for remote initialisation. Bad host select command, error writing to a job file or bad remote initialisation will cause a bad task - leading to submission failure. This method uses prep_submit_task_job() as helper. Return (list): list of tasks that attempted submission. """ if is_simulation: return self._simulation_submit_task_jobs(itasks) # Prepare tasks for job submission prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks) # Reset consumed host selection results self.task_remote_mgr.remote_host_select_reset() if not prepared_tasks: return bad_tasks # Group task jobs by (host, owner) auth_itasks = {} # {(host, owner): [itask, ...], ...} for itask in prepared_tasks: auth_itasks.setdefault((itask.task_host, itask.task_owner), []) auth_itasks[(itask.task_host, itask.task_owner)].append(itask) # Submit task jobs for each (host, owner) group done_tasks = bad_tasks for (host, owner), itasks in sorted(auth_itasks.items()): is_init = self.task_remote_mgr.remote_init(host, owner) if is_init is None: # Remote is waiting to be initialised for itask in itasks: itask.set_summary_message(self.REMOTE_INIT_MSG) self.job_pool.add_job_msg( get_task_job_id(itask.point, itask.tdef.name, itask.submit_num), self.REMOTE_INIT_MSG) continue # Ensure that localhost background/at jobs are recorded as running # on the host name of the current suite host, rather than just # "localhost". On suite restart on a different suite host, this # allows the restart logic to correctly poll the status of the # background/at jobs that may still be running on the previous # suite host. if (self.batch_sys_mgr.is_job_local_to_host( itask.summary['batch_sys_name']) and not is_remote_host(host)): owner_at_host = get_host() else: owner_at_host = host # Persist if owner: owner_at_host = owner + '@' + owner_at_host now_str = get_current_time_string() done_tasks.extend(itasks) for itask in itasks: # Log and persist LOG.info('[%s] -submit-num=%02d, owner@host=%s', itask, itask.submit_num, owner_at_host) self.suite_db_mgr.put_insert_task_jobs( itask, { 'is_manual_submit': itask.is_manual_submit, 'try_num': itask.get_try_num(), 'time_submit': now_str, 'user_at_host': owner_at_host, 'batch_sys_name': itask.summary['batch_sys_name'], }) itask.is_manual_submit = False if is_init == REMOTE_INIT_FAILED: # Remote has failed to initialise # Set submit-failed for all affected tasks for itask in itasks: itask.local_job_file_path = None # reset for retry log_task_job_activity( SubProcContext(self.JOBS_SUBMIT, '(init %s)' % owner_at_host, err=REMOTE_INIT_FAILED, ret_code=1), suite, itask.point, itask.tdef.name) self.task_events_mgr.process_message( itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED) continue # Build the "cylc jobs-submit" command cmd = ['cylc', self.JOBS_SUBMIT] if LOG.isEnabledFor(DEBUG): cmd.append('--debug') if get_utc_mode(): cmd.append('--utc-mode') remote_mode = False kwargs = {} for key, value, test_func in [('host', host, is_remote_host), ('user', owner, is_remote_user)]: if test_func(value): cmd.append('--%s=%s' % (key, value)) remote_mode = True kwargs[key] = value if remote_mode: cmd.append('--remote-mode') cmd.append('--') cmd.append(get_remote_suite_run_job_dir(host, owner, suite)) # Chop itasks into a series of shorter lists if it's very big # to prevent overloading of stdout and stderr pipes. itasks = sorted(itasks, key=lambda itask: itask.identity) chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1 itasks_batches = [ itasks[i:i + chunk_size] for i in range(0, len(itasks), chunk_size) ] LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd, [len(b) for b in itasks_batches]) for i, itasks_batch in enumerate(itasks_batches): stdin_files = [] job_log_dirs = [] for itask in itasks_batch: if remote_mode: stdin_files.append( get_task_job_job_log(suite, itask.point, itask.tdef.name, itask.submit_num)) job_log_dirs.append( get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)) # The job file is now (about to be) used: reset the file # write flag so that subsequent manual retrigger will # generate a new job file. itask.local_job_file_path = None itask.state.reset(TASK_STATUS_READY) if itask.state.outputs.has_custom_triggers(): self.suite_db_mgr.put_update_task_outputs(itask) self.proc_pool.put_command( SubProcContext(self.JOBS_SUBMIT, cmd + job_log_dirs, stdin_files=stdin_files, job_log_dirs=job_log_dirs, **kwargs), self._submit_task_jobs_callback, [suite, itasks_batch]) return done_tasks
def construct_ssh_cmd(raw_cmd, user=None, host=None, forward_x11=False, stdin=False, ssh_login_shell=None, ssh_cylc=None, set_UTC=False, allow_flag_opts=False): """Append a bare command with further options required to run via ssh. Arguments: raw_cmd (list): primitive command to run remotely. user (string): user ID for the remote login. host (string): remote host name. Use 'localhost' if not specified. forward_x11 (boolean): If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'. stdin: If None, the `-n` option will be added to the SSH command line. ssh_login_shell (boolean): If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`. ssh_cylc (string): Location of the remote cylc executable. set_UTC (boolean): If True, check UTC mode and specify if set to True (non-default). allow_flag_opts (boolean): If True, check CYLC_DEBUG and CYLC_VERBOSE and if non-default, specify debug and/or verbosity as options to the 'raw cmd'. Return: A list containing a chosen command including all arguments and options necessary to directly execute the bare command on a given host via ssh. """ command = shlex.split(glbl_cfg().get_host_item('ssh command', host, user)) if forward_x11: command.append('-Y') if stdin is None: command.append('-n') user_at_host = '' if user: user_at_host = user + '@' if host: user_at_host += host else: user_at_host += 'localhost' command.append(user_at_host) # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through. command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)] try: command.append( quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH'])) except KeyError: pass if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]: command.append(quote(r'CYLC_UTC=True')) command.append(quote(r'TZ=UTC')) # Use bash -l? if ssh_login_shell is None: ssh_login_shell = glbl_cfg().get_host_item( 'use login shell', host, user) if ssh_login_shell: # A login shell will always source /etc/profile and the user's bash # profile file. To avoid having to quote the entire remote command # it is passed as arguments to the bash script. command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')] # 'cylc' on the remote host if ssh_cylc: command.append(ssh_cylc) else: ssh_cylc = glbl_cfg().get_host_item('cylc executable', host, user) if ssh_cylc.endswith('cylc'): command.append(ssh_cylc) else: # TODO - raise appropriate exception raise ValueError( r'ERROR: bad cylc executable in global config: %s' % ssh_cylc) # Insert core raw command after ssh, but before its own, command options. command += raw_cmd if allow_flag_opts: if (cylc.flow.flags.verbose or os.getenv('CYLC_VERBOSE') in ["True", "true"]): command.append(r'--verbose') if (cylc.flow.flags.debug or os.getenv('CYLC_DEBUG') in ["True", "true"]): command.append(r'--debug') if LOG.handlers: LOG.debug("$ %s", ' '.join(quote(c) for c in command)) elif cylc.flow.flags.debug: sys.stderr.write("$ %s\n" % ' '.join(quote(c) for c in command)) return command
def _bespoke_stop(self): """Bespoke stop items.""" LOG.debug('stopping zmq socket...') self.stopping = True
def upgrade_is_held(self): """Upgrade hold_swap => is_held. * Add a is_held column. * Set status and is_held as per the new schema. * Set the swap_hold values to None (bacause sqlite3 does not support DROP COLUMN) From: cylc<8 To: cylc>=8 PR: #3230 Returns: bool - True if upgrade performed, False if upgrade skipped. """ conn = self.connect() # check if upgrade required schema = conn.execute(rf'PRAGMA table_info({self.TABLE_TASK_POOL})') for _, name, *_ in schema: if name == 'is_held': LOG.debug('is_held column present - skipping db upgrade') return False # perform upgrade for table in [self.TABLE_TASK_POOL, self.TABLE_TASK_POOL_CHECKPOINTS]: LOG.info('Upgrade hold_swap => is_held in %s', table) conn.execute(rf''' ALTER TABLE {table} ADD COLUMN is_held BOOL ''') for cycle, name, status, hold_swap in conn.execute(rf''' SELECT cycle, name, status, hold_swap FROM {table} '''): if status == 'held': new_status = hold_swap is_held = True elif hold_swap == 'held': new_status = status is_held = True else: new_status = status is_held = False conn.execute( rf''' UPDATE {table} SET status=?, is_held=?, hold_swap=? WHERE cycle==? AND name==? ''', (new_status, is_held, None, cycle, name)) self.remove_columns(table, ['hold_swap']) conn.commit() return True
def parse(fpath, output_fname=None, template_vars=None): """Parse file items line-by-line into a corresponding nested dict.""" # read and process the file (jinja2, include-files, line continuation) flines = read_and_proc(fpath, template_vars) if output_fname: with open(output_fname, 'w') as handle: handle.write('\n'.join(flines) + '\n') LOG.debug('Processed configuration dumped: %s', output_fname) nesting_level = 0 config = OrderedDictWithDefaults() parents = [] maxline = len(flines) - 1 index = -1 while index < maxline: index += 1 line = flines[index] if re.match(_LINECOMMENT, line): # skip full-line comments continue if re.match(_BLANKLINE, line): # skip blank lines continue m = re.match(_HEADING, line) if m: # matched a section heading s_open, sect_name, s_close = m.groups()[1:-1] nb = len(s_open) if nb != len(s_close): raise FileParseError('bracket mismatch', index, line) elif nb == nesting_level: # sibling section parents = parents[:-1] + [sect_name] elif nb == nesting_level + 1: # child section parents = parents + [sect_name] elif nb < nesting_level: # back up one or more levels ndif = nesting_level - nb parents = parents[:-ndif - 1] + [sect_name] else: raise FileParseError( 'Error line ' + str(index + 1) + ': ' + line) nesting_level = nb addsect(config, sect_name, parents[:-1]) else: m = re.match(_KEY_VALUE, line) if m: # matched a key=value item key, _, val = m.groups()[1:] if val.startswith('"""') or val.startswith("'''"): # triple quoted - may be a multiline value val, index = multiline(flines, val, index, maxline) addict(config, key, val, parents, index) else: # no match raise FileParseError( 'Invalid line ' + str(index + 1) + ': ' + line) return config
def remote_init(self, host, owner): """Initialise a remote [owner@]host if necessary. Create UUID file on suite host ".service/uuid" for remotes to identify shared file system with suite host. Call "cylc remote-init" to install suite items to remote: ".service/contact": For TCP task communication ".service/passphrase": For TCP task communication "python/": if source exists Return: REMOTE_INIT_NOT_REQUIRED: If remote init is not required, e.g. not remote REMOTE_INIT_DONE: If remote init done. REMOTE_INIT_FAILED: If init of the remote failed. Note: this will reset to None to allow retry. None: If waiting for remote init command to complete """ if self.single_task_mode or not is_remote(host, owner): return REMOTE_INIT_NOT_REQUIRED try: status = self.remote_init_map[(host, owner)] except KeyError: pass # Not yet initialised else: if status == REMOTE_INIT_FAILED: del self.remote_init_map[(host, owner)] # reset to allow retry return status # Determine what items to install comm_meth = glbl_cfg().get_host_item( 'task communication method', host, owner) owner_at_host = 'localhost' if host: owner_at_host = host if owner: owner_at_host = owner + '@' + owner_at_host LOG.debug('comm_meth[%s]=%s' % (owner_at_host, comm_meth)) items = self._remote_init_items(comm_meth) # No item to install if not items: self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED return self.remote_init_map[(host, owner)] # Create a TAR archive with the service files, # so they can be sent later via SSH's STDIN to the task remote. tmphandle = self.proc_pool.get_temporary_file() tarhandle = tarfile.open(fileobj=tmphandle, mode='w') for path, arcname in items: tarhandle.add(path, arcname=arcname) tarhandle.close() tmphandle.seek(0) # UUID file - for remote to identify shared file system with suite host uuid_fname = os.path.join( self.suite_srv_files_mgr.get_suite_srv_dir(self.suite), FILE_BASE_UUID) if not os.path.exists(uuid_fname): open(uuid_fname, 'wb').write(str(self.uuid_str).encode()) # Build the command cmd = ['cylc', 'remote-init'] if is_remote_host(host): cmd.append('--host=%s' % host) if is_remote_user(owner): cmd.append('--user=%s' % owner) if cylc.flow.flags.debug: cmd.append('--debug') if comm_meth in ['ssh']: cmd.append('--indirect-comm=%s' % comm_meth) cmd.append(str(self.uuid_str)) cmd.append(glbl_cfg().get_derived_host_item( self.suite, 'suite run directory', host, owner)) self.proc_pool.put_command( SubProcContext('remote-init', cmd, stdin_files=[tmphandle]), self._remote_init_callback, [host, owner, tmphandle]) # None status: Waiting for command to finish self.remote_init_map[(host, owner)] = None return self.remote_init_map[(host, owner)]