def test_get_platform_warn_mode_fail_if_backticks(): # Platform = `cmd in backticks` not allowed. task_conf = {'platform': '`echo ${chamber}`'} with pytest.raises(PlatformLookupError) as err: get_platform(task_conf, warn_only=True) assert err.match(r'platform = `echo \$\{chamber\}`: ' r'backticks are not supported; please use \$\(\)')
def test_get_platform_subshell(task_conf: Dict[str, str], expected_err_msg: Optional[str]): """Test get_platform() for subshell platform definition.""" if expected_err_msg: with pytest.raises(PlatformLookupError) as err: get_platform(task_conf) assert expected_err_msg in str(err.value) else: assert get_platform(task_conf) is None
def test_get_platform_cylc7_8_syntax_mix_fails(mock_glbl_cfg): """If a task with a mix of Cylc7 and 8 syntax is passed to get_platform this should return an error. """ task_conf = {'platform': 'localhost', 'remote': {'host': 'localhost'}} with pytest.raises( PlatformLookupError, match=r'A mixture of Cylc 7 \(host\) and Cylc 8 \(platform\).*'): get_platform(task_conf)
def get_platforms_from_task_jobs(flow: str, cyclepoint: str) -> Dict[str, Any]: """Access flow database. Return platform for task at fixed cycle point Uses the workflow database - designed to be used with tasks where jobs have been submitted. We assume that we want the most recent submission. Args: flow: The name of the Cylc flow to be queried. cyclepoint: The CyclePoint at which to query the job. task: The name of the task to be queried. Returns: Platform Dictionary. """ _, _, flow_file = parse_id(flow, constraint='workflows', src=True) dbfilepath = Path(flow_file).parent / '.service/db' dao = CylcWorkflowDAO(dbfilepath) task_platform_map: Dict = {} stmt = ('SELECT "name", "platform_name", "submit_num" ' 'FROM task_jobs WHERE cycle=?') for row in dao.connect().execute(stmt, [cyclepoint]): task, platform_n, submit_num = row platform = get_platform(platform_n) if ((task in task_platform_map and task_platform_map[task][0] < submit_num) or task not in task_platform_map): task_platform_map[task] = [submit_num, platform] # get rid of the submit number, we don't want it task_platform_map = { key: value[1] for key, value in task_platform_map.items() } return task_platform_map
def get_platform_from_task_def(flow: str, task: str) -> Dict[str, Any]: """Return the platform dictionary for a particular task. Uses the flow definition - designed to be used with tasks with unsubmitted jobs. Evaluates platform/host defined as subshell. Args: flow: The name of the Cylc flow to be queried. task: The name of the task to be queried. Returns: Platform Dictionary. """ _, _, flow_file = parse_id(flow, constraint='workflows', src=True) config = WorkflowConfig(flow, flow_file, Values()) # Get entire task spec to allow Cylc 7 platform from host guessing. task_spec = config.pcfg.get(['runtime', task]) # check for subshell and evaluate if (task_spec.get('platform') and is_platform_definition_subshell(task_spec['platform'])): task_spec['platform'] = eval_subshell(task_spec['platform']) elif (task_spec.get('remote', {}).get('host') and HOST_REC_COMMAND.match(task_spec['remote']['host'])): task_spec['remote']['host'] = eval_subshell( task_spec['remote']['host']) platform = get_platform(task_spec) return platform
def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys): """Process retrieval of task job logs from remote user@host.""" platform = get_platform(ctx.platform_n) ssh_str = str(platform["ssh command"]) rsync_str = str(platform["retrieve job logs command"]) cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str] if LOG.isEnabledFor(DEBUG): cmd.append("-v") if ctx.max_size: cmd.append("--max-size=%s" % (ctx.max_size, )) # Includes and excludes includes = set() for _, point, name, submit_num in id_keys: # Include relevant directories, all levels needed includes.add("/%s" % (point)) includes.add("/%s/%s" % (point, name)) includes.add("/%s/%s/%02d" % (point, name, submit_num)) includes.add("/%s/%s/%02d/**" % (point, name, submit_num)) cmd += ["--include=%s" % (include) for include in sorted(includes)] cmd.append("--exclude=/**") # exclude everything else # Remote source cmd.append("%s:%s/" % (get_host_from_platform(platform), get_remote_suite_run_job_dir(platform, schd_ctx.suite))) # Local target cmd.append(get_suite_run_job_dir(schd_ctx.suite) + "/") self.proc_pool.put_command( SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys), self._job_logs_retrieval_callback, [schd_ctx])
def main(parser, options): if options.run_dir: print(get_platform()['run directory']) else: glbl_cfg().idump(options.item, sparse=options.sparse, pnative=options.pnative)
def __init__(self, tdef: 'TaskDef', start_point: 'PointBase', flow_label: Optional[str], status: str = TASK_STATUS_WAITING, is_held: bool = False, submit_num: int = 0, is_late: bool = False, reflow: bool = True) -> None: self.tdef = tdef if submit_num is None: submit_num = 0 self.submit_num = submit_num self.jobs: List[str] = [] self.flow_label = flow_label self.reflow = reflow self.point = start_point self.identity: str = TaskID.get(self.tdef.name, self.point) self.reload_successor: Optional['TaskProxy'] = None self.point_as_seconds: Optional[int] = None self.is_manual_submit = False self.summary: Dict[str, Any] = { 'submitted_time': None, 'submitted_time_string': None, 'started_time': None, 'started_time_string': None, 'finished_time': None, 'finished_time_string': None, 'logfiles': [], 'platforms_used': {}, 'execution_time_limit': None, 'job_runner_name': None, 'submit_method_id': None, 'flow_label': None } self.local_job_file_path: Optional[str] = None self.platform = get_platform() self.job_vacated = False self.poll_timer: Optional['TaskActionTimer'] = None self.timeout: Optional[float] = None self.try_timers: Dict[str, 'TaskActionTimer'] = {} self.non_unique_events = Counter() # type: ignore # TODO: figure out self.clock_trigger_time: Optional[float] = None self.expire_time: Optional[float] = None self.late_time: Optional[float] = None self.is_late = is_late self.waiting_on_job_prep = True self.state = TaskState(tdef, self.point, status, is_held) # Determine graph children of this task (for spawning). self.graph_children = generate_graph_children(tdef, self.point)
def main(_, options: 'Values', *ids) -> None: workflow_id, _, flow_file = parse_id( *ids, src=True, constraint='workflows', ) # extract task host platforms from the workflow_id config = WorkflowConfig( workflow_id, flow_file, options, load_template_vars(options.templatevars, options.templatevars_file)) platforms = { config.get_config(['runtime', name, 'platform']) for name in config.get_namespace_list('all tasks') } - {None, 'localhost'} # When "workflow run hosts" are formalised as "flow platforms" # we can substitute `localhost` for this, in the mean time # we will have to assume that flow hosts are configured correctly. if not platforms: sys.exit(0) verbose = cylc.flow.flags.verbosity > 0 # get the cylc version on each platform versions = {} for platform_name in sorted(platforms): platform = get_platform(platform_name) host = get_host_from_platform(platform, bad_hosts=None) cmd = construct_ssh_cmd(['version'], platform, host) if verbose: print(cmd) proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() out = out.decode() err = err.decode() if proc.wait() == 0: if verbose: print(" %s" % out) versions[platform_name] = out.strip() else: versions[platform_name] = f'ERROR: {err.strip()}' # report results max_len = max((len(platform_name) for platform_name in platforms)) print(f'{"platform".rjust(max_len)}: cylc version') print('-' * (max_len + 14)) for platform_name, result in versions.items(): print(f'{platform_name.rjust(max_len)}: {result}') if all((version == CYLC_VERSION for version in versions.values())): ret_code = 0 elif options.error: ret_code = 1 else: ret_code = 0 sys.exit(ret_code)
def insert_db_job(self, row_idx, row): """Load job element from DB post restart.""" if row_idx == 0: LOG.info("LOADING job data") (point_string, name, status, submit_num, time_submit, time_run, time_run_exit, batch_sys_name, batch_sys_job_id, platform_name) = row if status not in JOB_STATUS_SET: return t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}' j_id = f'{t_id}{ID_DELIM}{submit_num}' try: tdef = self.schd.config.get_taskdef(name) j_owner = self.schd.owner if platform_name: j_host = get_host_from_platform(get_platform(platform_name)) else: j_host = self.schd.host j_buf = PbJob( stamp=f'{j_id}@{time()}', id=j_id, submit_num=submit_num, state=status, task_proxy=t_id, submitted_time=time_submit, started_time=time_run, finished_time=time_run_exit, batch_sys_name=batch_sys_name, batch_sys_job_id=batch_sys_job_id, host=j_host, owner=j_owner, name=name, cycle_point=point_string, ) # Add in log files. j_buf.job_log_dir = get_task_job_log(self.schd.suite, point_string, name, submit_num) overrides = self.schd.task_events_mgr.broadcast_mgr.get_broadcast( TaskID.get(name, point_string)) if overrides: rtconfig = pdeepcopy(tdef.rtconfig) poverride(rtconfig, overrides, prepend=True) else: rtconfig = tdef.rtconfig j_buf.extra_logs.extend([ os.path.expanduser(os.path.expandvars(log_file)) for log_file in rtconfig['extra log files'] ]) except SuiteConfigError: LOG.exception( ('ignoring job %s from the suite run database\n' '(its task definition has probably been deleted).') % j_id) except Exception: LOG.exception('could not load job %s' % j_id) else: self.added[j_id] = j_buf self.task_jobs.setdefault(t_id, set()).add(j_id) self.updates_pending = True
def main(_, options, *args): # suite name or file path suite, flow_file = parse_suite_arg(options, args[0]) # extract task host platforms from the suite config = SuiteConfig( suite, flow_file, options, load_template_vars(options.templatevars, options.templatevars_file)) platforms = { config.get_config(['runtime', name, 'platform']) for name in config.get_namespace_list('all tasks') } - {None, 'localhost'} # When "suite run hosts" are formalised as "flow platforms" # we can substitute `localhost` for this, in the mean time # we will have to assume that flow hosts are configured correctly. if not platforms: sys.exit(0) verbose = cylc.flow.flags.verbose # get the cylc version on each platform versions = {} for platform_name in sorted(platforms): platform = get_platform(platform_name) cmd = construct_platform_ssh_cmd(['version'], platform) if verbose: print(cmd) proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() out = out.decode() err = err.decode() if proc.wait() == 0: if verbose: print(" %s" % out) versions[platform_name] = out.strip() else: versions[platform_name] = f'ERROR: {err.strip()}' # report results max_len = max((len(platform_name) for platform_name in platforms)) print(f'{"platform".rjust(max_len)}: cylc version') print('-' * (max_len + 14)) for platform_name, result in versions.items(): print(f'{platform_name.rjust(max_len)}: {result}') if all((version == CYLC_VERSION for version in versions.values())): exit = 0 elif options.error: exit = 1 else: exit = 0 sys.exit(exit)
def get_cylc_run_abs_path(path): """Return the absolute path under the cylc-run directory for the specified relative path. If the specified path is already absolute, just return it. The path need not exist. """ if os.path.isabs(path): return path cylc_run_dir = os.path.expandvars(get_platform()['run directory']) return os.path.join(cylc_run_dir, path)
def test_get_platform_groups_basic(mock_glbl_cfg): # get platform from group works. mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', ''' [platforms] [[aleph]] hosts = aleph [[bet]] hosts = bet [platform groups] [[hebrew_letters]] platforms = aleph, bet ''') output = get_platform('hebrew_letters') assert output['group'] == 'hebrew_letters' random.seed(42) assert get_platform('hebrew_letters')['name'] == 'aleph' random.seed(44) assert get_platform('hebrew_letters')['name'] == 'bet'
def test_get_platform_from_platform_name_str(mock_glbl_cfg): # Check that an arbitary string name returns a sensible platform mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', ''' [platforms] [[saffron]] hosts = saff01 batch system = slurm ''') platform = get_platform('saffron') assert platform['hosts'] == ['saff01'] assert platform['batch system'] == 'slurm'
def remote_tidy(self): """Remove suite contact files and keys from initialised remotes. Call "cylc remote-tidy". This method is called on suite shutdown, so we want nothing to hang. Timeout any incomplete commands after 10 seconds. """ # Issue all SSH commands in parallel procs = {} for platform, init_with_contact in self.remote_init_map.items(): platform = get_platform(platform) host = get_host_from_platform(platform) owner = platform['owner'] self.install_target = get_install_target_from_platform(platform) if init_with_contact != REMOTE_INIT_DONE: continue cmd = ['remote-tidy'] if cylc.flow.flags.debug: cmd.append('--debug') cmd.append(str(f'{self.install_target}')) cmd.append(get_remote_suite_run_dir(platform, self.suite)) if is_remote_platform(platform): cmd = construct_platform_ssh_cmd(cmd, platform, timeout='10s') else: cmd = ['cylc'] + cmd procs[(host, owner)] = ( cmd, Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL)) # Wait for commands to complete for a max of 10 seconds timeout = time() + 10.0 while procs and time() < timeout: for (host, owner), (cmd, proc) in procs.copy().items(): if proc.poll() is None: continue del procs[(host, owner)] out, err = (f.decode() for f in proc.communicate()) if proc.wait(): LOG.warning(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err)) # Terminate any remaining commands for (host, owner), (cmd, proc) in procs.items(): try: proc.terminate() except OSError: pass out, err = proc.communicate() if proc.wait(): LOG.warning(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_TIDY, (host, owner), ' '.join(quote(item) for item in cmd), proc.returncode, out, err))
def test_get_platform_from_platform_name_str(mock_glbl_cfg, platform_re): # Check that an arbitrary string name returns a sensible platform mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', f''' [platforms] [[{platform_re}]] hosts = saff01 job runner = slurm ''') platform = get_platform('saffron') assert platform['hosts'] == ['saff01'] assert platform['job runner'] == 'slurm'
def test_get_platform_from_config_with_platform_name(mock_glbl_cfg): # A platform name is present, and no clashing cylc7 configs are: mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', ''' [platforms] [[mace]] hosts = mace001, mace002 batch system = slurm ''') task_conf = {'platform': 'mace'} platform = get_platform(task_conf) assert platform['hosts'] == ['mace001', 'mace002'] assert platform['batch system'] == 'slurm'
def test_get_platform_warn_mode(caplog): task_conf = { 'remote': { 'host': 'cylcdevbox' }, 'job': { 'batch system': 'pbs', 'batch submit command template': 'some template' } } output = get_platform(task_conf, warn_only=True) for forbidden_item in ('batch submit command template = some template', 'host = cylcdevbox', 'batch system = pbs'): assert forbidden_item in output
def test_get_localhost_platform(mock_glbl_cfg, platform_re): # Check that an arbitrary string name returns a sensible platform mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', f''' [platforms] [[localhost]] hosts = localhost ssh command = ssh -oConnectTimeout=42 [[{platform_re}]] hosts = localhost ssh command = ssh -oConnectTimeout=24 ''') platform = get_platform('localhost') if platform_re: assert platform['ssh command'] == 'ssh -oConnectTimeout=24' else: assert platform['ssh command'] == 'ssh -oConnectTimeout=42'
def test_get_platform_groups_basic(mock_glbl_cfg): # get platform from group works. mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', ''' [platforms] [[aleph]] hosts = aleph [[bet]] hosts = bet [platform groups] [[hebrew_letters]] platforms = aleph, bet [[[selection]]] method = definition order ''') output = get_platform('hebrew_letters') assert output['name'] == 'aleph'
def _run_job_cmd(self, cmd_key, suite, itasks, callback): """Run job commands, e.g. poll, kill, etc. Group itasks with their platform_name and host. Put a job command for each group to the multiprocess pool. """ if not itasks: return # sort itasks into lists based upon where they were run. auth_itasks = {} for itask in itasks: platform_n = itask.platform['name'] if platform_n not in auth_itasks: auth_itasks[platform_n] = [] auth_itasks[platform_n].append(itask) # Go through each list of itasks and carry out commands as required. for platform_n, itasks in sorted(auth_itasks.items()): platform = get_platform(platform_n) if is_remote_platform(platform): remote_mode = True cmd = [cmd_key] else: cmd = ["cylc", cmd_key] remote_mode = False if LOG.isEnabledFor(DEBUG): cmd.append("--debug") cmd.append("--") cmd.append(get_remote_suite_run_job_dir(platform, suite)) job_log_dirs = [] if remote_mode: cmd = construct_ssh_cmd(cmd, platform) for itask in sorted(itasks, key=lambda itask: itask.identity): job_log_dirs.append( get_task_job_id(itask.point, itask.tdef.name, itask.submit_num)) cmd += job_log_dirs self.proc_pool.put_command(SubProcContext(cmd_key, cmd), callback, [suite, itasks])
def test_get_platform_using_platform_from_job_info(mock_glbl_cfg, task_conf, expected_platform_name): """Calculate platform from Cylc 7 config: n.b. If this fails we don't warn because this might lead to many thousands of warnings This should not contain a comprehensive set of use-cases - these should be coverend by the unit tests for `platform_from_host_items` """ mock_glbl_cfg( 'cylc.flow.platforms.glbl_cfg', ''' [platforms] [[ras_el_hanout]] hosts = rose, chilli, cumin, paprika batch system = slurm [[spice_bg]] hosts = rose, chilli, cumin, paprika [[local_batch_system]] hosts = localhost batch system = batchyMcBatchFace [[cylcdevbox]] hosts = cylcdevbox ''') assert get_platform(task_conf)['name'] == expected_platform_name
def get_platform_from_task_def(flow: str, task: str) -> Dict[str, Any]: """Return the platform dictionary for a particular task. Uses the flow definition - designed to be used with tasks with unsubmitted jobs. Args: flow: The name of the Cylc flow to be queried. task: The name of the task to be queried. Returns: Platform Dictionary. """ _, _, flow_file = parse_id(flow, constraint='workflows', src=True) config = WorkflowConfig(flow, flow_file, Values()) # Get entire task spec to allow Cylc 7 platform from host guessing. task_spec = config.pcfg.get(['runtime', task]) platform = get_platform(task_spec) if platform is None: raise PlatformLookupError( 'Platform lookup failed; platform is a subshell to be evaluated: ' f' Task: {task}, platform: {task_spec["platform"]}.') return platform
def __init__(self, tdef, start_point, flow_label, status=TASK_STATUS_WAITING, is_held=False, submit_num=0, is_late=False, reflow=True): self.tdef = tdef if submit_num is None: submit_num = 0 self.submit_num = submit_num self.jobs = [] self.flow_label = flow_label self.reflow = reflow self.point = start_point self.identity = TaskID.get(self.tdef.name, self.point) self.reload_successor = None self.point_as_seconds = None self.manual_trigger = False self.is_manual_submit = False self.summary = { 'latest_message': '', 'submitted_time': None, 'submitted_time_string': None, 'started_time': None, 'started_time_string': None, 'finished_time': None, 'finished_time_string': None, 'logfiles': [], 'platforms_used': {}, 'execution_time_limit': None, 'batch_sys_name': None, 'submit_method_id': None, 'flow_label': None } self.local_job_file_path = None self.platform = get_platform() self.task_owner = None self.job_vacated = False self.poll_timer = None self.timeout = None self.try_timers = {} # Use dict here for Python 2.6 compat. # Should use collections.Counter in Python 2.7+ self.non_unique_events = {} self.clock_trigger_time = None self.expire_time = None self.late_time = None self.is_late = is_late self.state = TaskState(tdef, self.point, status, is_held) # Determine graph children of this task (for spawning). self.graph_children = {} for seq, dout in tdef.graph_children.items(): for output, downs in dout.items(): if output not in self.graph_children: self.graph_children[output] = [] for name, trigger in downs: child_point = trigger.get_child_point(self.point, seq) is_abs = (trigger.offset_is_absolute or trigger.offset_is_from_icp) if is_abs: if trigger.get_parent_point(self.point) != self.point: # If 'foo[^] => bar' only spawn off of '^'. continue if seq.is_on_sequence(child_point): # E.g.: foo should trigger only on T06: # PT6H = "waz" # T06 = "waz[-PT6H] => foo" self.graph_children[output].append( (name, child_point, is_abs)) if tdef.sequential: # Add next-instance child. nexts = [] for seq in tdef.sequences: nxt = seq.get_next_point(self.point) if nxt is not None: # Within sequence bounds. nexts.append(nxt) if nexts: if TASK_OUTPUT_SUCCEEDED not in self.graph_children: self.graph_children[TASK_OUTPUT_SUCCEEDED] = [] self.state.outputs.add(TASK_OUTPUT_SUCCEEDED) self.graph_children[TASK_OUTPUT_SUCCEEDED].append( (tdef.name, min(nexts), False)) if TASK_OUTPUT_FAILED in self.graph_children: self.failure_handled = True else: self.failure_handled = False
def get_suite_run_work_dir(suite, *args): """Return local suite work/work directory, join any extra args.""" return expandvars( os.path.join(get_platform()['work directory'], suite, 'work', *args))
def main(parser, options, suite): if options.use_task_point and options.cycle: raise UserInputError( "cannot specify a cycle point and use environment variable") if options.use_task_point: if "CYLC_TASK_CYCLE_POINT" in os.environ: options.cycle = os.environ["CYLC_TASK_CYCLE_POINT"] else: raise UserInputError("CYLC_TASK_CYCLE_POINT is not defined") if options.offset and not options.cycle: raise UserInputError( "You must target a cycle point to use an offset") if options.template: print("WARNING: ignoring --template (no longer needed)", file=sys.stderr) # Attempt to apply specified offset to the targeted cycle if options.offset: options.cycle = str(add_offset(options.cycle, options.offset)) # Exit if both task state and message are to being polled if options.status and options.msg: raise UserInputError("cannot poll both status and custom output") if options.msg and not options.task and not options.cycle: raise UserInputError("need a taskname and cyclepoint") # Exit if an invalid status is requested if (options.status and options.status not in TASK_STATUSES_ORDERED and options.status not in CylcSuiteDBChecker.STATE_ALIASES): raise UserInputError("invalid status '" + options.status + "'") # this only runs locally run_dir = os.path.expandvars( os.path.expanduser( options.run_dir or get_platform()['run directory'] ) ) pollargs = {'suite': suite, 'run_dir': run_dir, 'task': options.task, 'cycle': options.cycle, 'status': options.status, 'message': options.msg, } spoller = SuitePoller("requested state", options.interval, options.max_polls, args=pollargs) connected, formatted_pt = spoller.connect() if not connected: raise CylcError("cannot connect to the suite DB") if options.status and options.task and options.cycle: # check a task status spoller.condition = options.status if not spoller.poll(): sys.exit(1) elif options.msg: # Check for a custom task output spoller.condition = "output: %s" % options.msg if not spoller.poll(): sys.exit(1) else: # just display query results spoller.checker.display_maps( spoller.checker.suite_state_query( task=options.task, cycle=formatted_pt, status=options.status))
def test_get_platform_no_args(): # If no task conf is given, we get localhost args. assert get_platform()['hosts'] == ['localhost']
def _run_command_exit( cls, ctx, bad_hosts=None, callback: Optional[Callable] = None, callback_args: Optional[List[Any]] = None, callback_255: Optional[Callable] = None, callback_255_args: Optional[List[Any]] = None) -> None: """Process command completion. If task has failed with a 255 error, run an alternative callback if one is provided. Args: ctx: SubProcContext object for this task. callback: Function to run on command exit. callback_args: Arguments to proivide to callback callback_255: Function to run if command exits with a 255 error - usually associated with ssh being unable to contact a remote host. callback_255_args: Arguments for the 255 callback function. """ def _run_callback(callback, args_=None): if callable(callback): if not args_: args_ = [] callback(ctx, *args_) else: return False ctx.timestamp = get_current_time_string() # If cmd is fileinstall, which uses rsync, get a platform so # that you can use that platform's ssh command. platform = None if isinstance(ctx.cmd_key, TaskJobLogsRetrieveContext): platform = get_platform(ctx.cmd_key.platform_name) elif callback_args: platform = callback_args[0] if not (isinstance(platform, dict) and 'ssh command' in platform and 'name' in platform): # the first argument is not a platform platform = None if cls.ssh_255_fail(ctx) or cls.rsync_255_fail(ctx, platform) is True: # Job log retrieval passes a special object as a command key # Extra logic to provide sensible strings for logging. if isinstance(ctx.cmd_key, TaskJobLogsRetrieveContext): cmd_key = ctx.cmd_key.key else: cmd_key = ctx.cmd_key log_platform_event( # NOTE: the failure of the command should be logged elsewhere (f'Could not connect to {ctx.host}.' f'\n* {ctx.host} has been added to the list of' ' unreachable hosts' f'\n* {cmd_key} will retry if another host is available.'), platform or {'name': None}, level='warning', ) # If callback_255 takes the same args as callback, we don't # want to spec those args: if callable(callback_255) and callback_255_args is None: callback_255_args = callback_args # Run Callback if bad_hosts is not None: bad_hosts.add(ctx.host) res = _run_callback(callback_255, callback_255_args) if res is False: _run_callback(callback, callback_args) else: # For every other return code run default callback. _run_callback(callback, callback_args)
def _construct_ssh_cmd(raw_cmd, host=None, forward_x11=False, stdin=False, ssh_cmd=None, ssh_login_shell=None, remote_cylc_path=None, set_UTC=False, set_verbosity=False, timeout=None): """Build an SSH command for execution on a remote platform hosts. Arguments: raw_cmd (list): primitive command to run remotely. host (string): remote host name. Use 'localhost' if not specified. forward_x11 (boolean): If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'. stdin: If None, the `-n` option will be added to the SSH command line. ssh_cmd (string): ssh command to use: If unset defaults to localhost ssh cmd. ssh_login_shell (boolean): If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`. remote_cylc_path (string): Path containing the `cylc` executable. This is required if the remote executable is not in $PATH. set_UTC (boolean): If True, check UTC mode and specify if set to True (non-default). set_verbosity (boolean): If True apply -q, -v opts to match cylc.flow.flags.verbosity. timeout (str): String for bash timeout command. Return: list - A list containing a chosen command including all arguments and options necessary to directly execute the bare command on a given host via ssh. """ # If ssh cmd isn't given use the default from localhost settings. if ssh_cmd is None: command = shlex.split(get_platform()['ssh command']) else: command = shlex.split(ssh_cmd) if forward_x11: command.append('-Y') if stdin is None: command.append('-n') user_at_host = '' if host: user_at_host += host else: user_at_host += 'localhost' command.append(user_at_host) # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through. command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)] for envvar in [ 'CYLC_CONF_PATH', 'CYLC_COVERAGE', 'CLIENT_COMMS_METH', 'CYLC_ENV_NAME' ]: if envvar in os.environ: command.append(quote(f'{envvar}={os.environ[envvar]}')) if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]: command.append(quote(r'CYLC_UTC=True')) command.append(quote(r'TZ=UTC')) # Use bash -l? if ssh_login_shell is None: ssh_login_shell = get_platform()['use login shell'] if ssh_login_shell: # A login shell will always source /etc/profile and the user's bash # profile file. To avoid having to quote the entire remote command # it is passed as arguments to the bash script. command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')] if timeout: command += ['timeout', timeout] # 'cylc' on the remote host if not remote_cylc_path: remote_cylc_path = get_platform()['cylc path'] if remote_cylc_path: cylc_cmd = str(Path(remote_cylc_path) / 'cylc') else: cylc_cmd = 'cylc' command.append(cylc_cmd) # Insert core raw command after ssh, but before its own, command options. command += raw_cmd if set_verbosity: command.extend(verbosity_to_opts(cylc.flow.flags.verbosity)) return command
def construct_ssh_cmd(raw_cmd, host=None, forward_x11=False, stdin=False, ssh_cmd=None, ssh_login_shell=None, ssh_cylc=None, set_UTC=False, allow_flag_opts=False, timeout=None): """Append a bare command with further options required to run via ssh. Arguments: raw_cmd (list): primitive command to run remotely. host (string): remote host name. Use 'localhost' if not specified. forward_x11 (boolean): If True, use 'ssh -Y' to enable X11 forwarding, else just 'ssh'. stdin: If None, the `-n` option will be added to the SSH command line. ssh_cmd (string): ssh command to use: If unset defaults to localhost ssh cmd. ssh_login_shell (boolean): If True, launch remote command with `bash -l -c 'exec "$0" "$@"'`. ssh_cylc (string): Location of the remote cylc executable. set_UTC (boolean): If True, check UTC mode and specify if set to True (non-default). allow_flag_opts (boolean): If True, check CYLC_DEBUG and CYLC_VERBOSE and if non-default, specify debug and/or verbosity as options to the 'raw cmd'. timeout (str): String for bash timeout command. Return: A list containing a chosen command including all arguments and options necessary to directly execute the bare command on a given host via ssh. """ # If ssh cmd isn't given use the default from localhost settings. if ssh_cmd is None: command = shlex.split(get_platform()['ssh command']) else: command = shlex.split(ssh_cmd) if forward_x11: command.append('-Y') if stdin is None: command.append('-n') user_at_host = '' if host: user_at_host += host else: user_at_host += 'localhost' command.append(user_at_host) # Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through. command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)] try: command.append( quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH'])) except KeyError: pass if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]: command.append(quote(r'CYLC_UTC=True')) command.append(quote(r'TZ=UTC')) # Use bash -l? if ssh_login_shell is None: ssh_login_shell = get_platform()['use login shell'] if ssh_login_shell: # A login shell will always source /etc/profile and the user's bash # profile file. To avoid having to quote the entire remote command # it is passed as arguments to the bash script. command += ['bash', '--login', '-c', quote(r'exec "$0" "$@"')] if timeout: command += ['timeout', timeout] # 'cylc' on the remote host if ssh_cylc: command.append(ssh_cylc) else: ssh_cylc = get_platform()['cylc executable'] if ssh_cylc.endswith('cylc'): command.append(ssh_cylc) else: # TODO - raise appropriate exception raise ValueError( r'ERROR: bad cylc executable in global config: %s' % ssh_cylc) # Insert core raw command after ssh, but before its own, command options. command += raw_cmd if allow_flag_opts: if (cylc.flow.flags.verbose or os.getenv('CYLC_VERBOSE') in ["True", "true"]): command.append(r'--verbose') if (cylc.flow.flags.debug or os.getenv('CYLC_DEBUG') in ["True", "true"]): command.append(r'--debug') return command