def test_sprocess_communicate_with_process(self): foo = ' foo' bar = ' bar' cmd = ["echo", "this is a command" + foo + bar] p = procopen(cmd, stdoutpipe=True) stdout, _ = p.communicate() compare(stdout, b"this is a command foo bar\n")
def _run_command_init(cls, ctx, bad_hosts=None, callback=None, callback_args=None, callback_255=None, callback_255_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_files'): if len(ctx.cmd_kwargs['stdin_files']) > 1: stdin_file = cls.get_temporary_file() for file_ in ctx.cmd_kwargs['stdin_files']: if hasattr(file_, 'read'): stdin_file.write(file_.read()) else: with open(file_, 'rb') as openfile: stdin_file.write(openfile.read()) stdin_file.seek(0) elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'): stdin_file = ctx.cmd_kwargs['stdin_files'][0] else: stdin_file = open( # noqa: SIM115 # (nasty use of file handles, should avoid in future) ctx.cmd_kwargs['stdin_files'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = cls.get_temporary_file() stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode()) stdin_file.seek(0) else: stdin_file = DEVNULL proc = procopen( ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), usesh=ctx.cmd_kwargs.get('shell')) # calls to open a shell are aggregated in cylc_subproc.procopen() # with logging for what is calling it and the commands given except OSError as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, bad_hosts=bad_hosts, callback=callback, callback_args=callback_args, callback_255=callback_255, callback_255_args=callback_255_args) return None else: LOG.debug(ctx.cmd) return proc
def main(_, options: 'Values', *ids) -> None: workflow_id, _, flow_file = parse_id( *ids, src=True, constraint='workflows', ) # extract task host platforms from the workflow_id config = WorkflowConfig( workflow_id, flow_file, options, load_template_vars(options.templatevars, options.templatevars_file)) platforms = { config.get_config(['runtime', name, 'platform']) for name in config.get_namespace_list('all tasks') } - {None, 'localhost'} # When "workflow run hosts" are formalised as "flow platforms" # we can substitute `localhost` for this, in the mean time # we will have to assume that flow hosts are configured correctly. if not platforms: sys.exit(0) verbose = cylc.flow.flags.verbosity > 0 # get the cylc version on each platform versions = {} for platform_name in sorted(platforms): platform = get_platform(platform_name) host = get_host_from_platform(platform, bad_hosts=None) cmd = construct_ssh_cmd(['version'], platform, host) if verbose: print(cmd) proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() out = out.decode() err = err.decode() if proc.wait() == 0: if verbose: print(" %s" % out) versions[platform_name] = out.strip() else: versions[platform_name] = f'ERROR: {err.strip()}' # report results max_len = max((len(platform_name) for platform_name in platforms)) print(f'{"platform".rjust(max_len)}: cylc version') print('-' * (max_len + 14)) for platform_name, result in versions.items(): print(f'{platform_name.rjust(max_len)}: {result}') if all((version == CYLC_VERSION for version in versions.values())): ret_code = 0 elif options.error: ret_code = 1 else: ret_code = 0 sys.exit(ret_code)
def job_kill(self, st_file_path): """Ask job runner to terminate the job specified in "st_file_path". Return 0 on success, non-zero integer on failure. """ # WORKFLOW_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status self.configure_workflow_run_dir(st_file_path.rsplit(os.sep, 6)[0]) try: with open(st_file_path) as st_file: for line in st_file: if line.startswith(f"{self.CYLC_JOB_RUNNER_NAME}="): job_runner = self._get_sys(line.strip().split("=", 1)[1]) break else: return (1, "Cannot determine job runner from " f"{JOB_LOG_STATUS} file") st_file.seek(0, 0) # rewind if getattr(job_runner, "SHOULD_KILL_PROC_GROUP", False): for line in st_file: if line.startswith(CYLC_JOB_PID + "="): pid = line.strip().split("=", 1)[1] try: os.killpg(os.getpgid(int(pid)), SIGKILL) except (OSError, ValueError) as exc: traceback.print_exc() return (1, str(exc)) else: return (0, "") st_file.seek(0, 0) # rewind if hasattr(job_runner, "KILL_CMD_TMPL"): for line in st_file: if not line.startswith(f"{self.CYLC_JOB_ID}="): continue job_id = line.strip().split("=", 1)[1] command = shlex.split(job_runner.KILL_CMD_TMPL % {"job_id": job_id}) try: proc = procopen(command, stdindevnull=True, stderrpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting # the filename of the executable when it raises an # OSError. if not exc.filename: exc.filename = command[0] traceback.print_exc() return (1, str(exc)) else: return (proc.wait(), proc.communicate()[1].decode()) return (1, f"Cannot determine job ID from {JOB_LOG_STATUS} file") except IOError as exc: return (1, str(exc))
def cmd_find_ver(module, min_ver, cmd_base, ver_opt, ver_extr, outfile=1, write=True): """Print outcome & return Boolean (True for pass) of local module version requirement test using relevant custom command base keyword(s), version-checking option(s) & version-extraction regex. """ msg = '%s (%s)' % (module, string_ver(min_ver) + '+' if min_ver is not None else 'any') for cmd in cmd_base: try_next_cmd = True if procopen(['which', cmd], stdin=open(os.devnull), stdoutpipe=True, stderrpipe=True).wait(): res = [NOTFOUND_MSG, False] else: try: output = procopen( [cmd, ver_opt], stdoutpipe=True, stdin=open(os.devnull), stderrpipe=True).communicate()[outfile - 1].decode()\ .strip() version = re.search(ver_extr, output).groups()[0] try_next_cmd = False if min_ver is None: res = ['%s (%s)' % (FOUND_NOVER_MSG, version), True] elif parse_version(version) >= min_ver: res = ['%s (%s)' % (MINVER_MET_MSG, version), True] else: res = ['%s (%s)' % (MINVER_NOTMET_MSG, version), False] except AttributeError: res = [FOUND_UNKNOWNVER_MSG, False] if not try_next_cmd: break if write: shell_align_write('.', msg, res[0]) return res[1]
def main(_, options, *args): # suite name or file path suite, flow_file = parse_suite_arg(options, args[0]) # extract task host platforms from the suite config = SuiteConfig( suite, flow_file, options, load_template_vars(options.templatevars, options.templatevars_file)) platforms = { config.get_config(['runtime', name, 'platform']) for name in config.get_namespace_list('all tasks') } - {None, 'localhost'} # When "suite run hosts" are formalised as "flow platforms" # we can substitute `localhost` for this, in the mean time # we will have to assume that flow hosts are configured correctly. if not platforms: sys.exit(0) verbose = cylc.flow.flags.verbose # get the cylc version on each platform versions = {} for platform_name in sorted(platforms): platform = get_platform(platform_name) cmd = construct_platform_ssh_cmd(['version'], platform) if verbose: print(cmd) proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) out, err = proc.communicate() out = out.decode() err = err.decode() if proc.wait() == 0: if verbose: print(" %s" % out) versions[platform_name] = out.strip() else: versions[platform_name] = f'ERROR: {err.strip()}' # report results max_len = max((len(platform_name) for platform_name in platforms)) print(f'{"platform".rjust(max_len)}: cylc version') print('-' * (max_len + 14)) for platform_name, result in versions.items(): print(f'{platform_name.rjust(max_len)}: {result}') if all((version == CYLC_VERSION for version in versions.values())): exit = 0 elif options.error: exit = 1 else: exit = 0 sys.exit(exit)
def job_kill(self, st_file_path): """Ask batch system to terminate the job specified in "st_file_path". Return 0 on success, non-zero integer on failure. """ # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0]) try: st_file = open(st_file_path) for line in st_file: if line.startswith(self.CYLC_BATCH_SYS_NAME + "="): batch_sys = self._get_sys(line.strip().split("=", 1)[1]) break else: return (1, "Cannot determine batch system from %s file" % (JOB_LOG_STATUS)) st_file.seek(0, 0) # rewind if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False): for line in st_file: if line.startswith(CYLC_JOB_PID + "="): pid = line.strip().split("=", 1)[1] try: os.killpg(os.getpgid(int(pid)), SIGKILL) except (OSError, ValueError) as exc: traceback.print_exc() return (1, str(exc)) else: return (0, "") st_file.seek(0, 0) # rewind if hasattr(batch_sys, "KILL_CMD_TMPL"): for line in st_file: if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="): continue job_id = line.strip().split("=", 1)[1] command = shlex.split(batch_sys.KILL_CMD_TMPL % {"job_id": job_id}) try: proc = procopen(command, stdindevnull=True, stderrpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] traceback.print_exc() return (1, str(exc)) else: return (proc.wait(), proc.communicate()[1].decode()) return (1, "Cannot determine batch job ID from %s file" % (JOB_LOG_STATUS)) except IOError as exc: return (1, str(exc))
def job_kill(self, st_file_path): """Ask batch system to terminate the job specified in "st_file_path". Return 0 on success, non-zero integer on failure. """ # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0]) try: st_file = open(st_file_path) for line in st_file: if line.startswith(self.CYLC_BATCH_SYS_NAME + "="): batch_sys = self._get_sys(line.strip().split("=", 1)[1]) break else: return (1, "Cannot determine batch system from %s file" % ( JOB_LOG_STATUS)) st_file.seek(0, 0) # rewind if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False): for line in st_file: if line.startswith(CYLC_JOB_PID + "="): pid = line.strip().split("=", 1)[1] try: os.killpg(os.getpgid(int(pid)), SIGKILL) except (OSError, ValueError) as exc: traceback.print_exc() return (1, str(exc)) else: return (0, "") st_file.seek(0, 0) # rewind if hasattr(batch_sys, "KILL_CMD_TMPL"): for line in st_file: if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="): continue job_id = line.strip().split("=", 1)[1] command = shlex.split( batch_sys.KILL_CMD_TMPL % {"job_id": job_id}) try: proc = procopen(command, stdin=open(os.devnull), stderrpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] traceback.print_exc() return (1, str(exc)) else: return (proc.wait(), proc.communicate()[1].decode()) return (1, "Cannot determine batch job ID from %s file" % ( JOB_LOG_STATUS)) except IOError as exc: return (1, str(exc))
def output_width(min_width=65, max_width=90): """Return a suitable output alignment width given user terminal width.""" proc = procopen(['stty', 'size'], stdoutpipe=True) if proc.wait(): return int((min_width + max_width) / 2) else: try: return max(min_width, min(max_width, int(proc.communicate()[0].split()[1]))) except IndexError: return int((min_width + max_width) / 2)
def _run_command_init(cls, ctx, callback=None, callback_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_files'): if len(ctx.cmd_kwargs['stdin_files']) > 1: stdin_file = cls.get_temporary_file() for file_ in ctx.cmd_kwargs['stdin_files']: if hasattr(file_, 'read'): stdin_file.write(file_.read()) else: stdin_file.write(open(file_, 'rb').read()) stdin_file.seek(0) elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'): stdin_file = ctx.cmd_kwargs['stdin_files'][0] else: stdin_file = open( ctx.cmd_kwargs['stdin_files'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = cls.get_temporary_file() stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode()) stdin_file.seek(0) else: stdin_file = open(os.devnull) proc = procopen( ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), usesh=ctx.cmd_kwargs.get('shell')) # calls to open a shell are aggregated in cylc_subproc.procopen() # with logging for what is calling it and the commands given except (IOError, OSError) as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, callback, callback_args) return None else: LOG.debug(ctx.cmd) return proc
def run_get_stdout(command, timeout=None, poll_delay=None): """Get standard output from a shell command. If "timeout" is specified, it should be the number of seconds before timeout. On timeout, the command will be killed. The argument "poll_delay" is only relevant if "timeout" is specified. It specifies the intervals in number of seconds between polling for the completion of the command. Return (True, [stdoutline1, ...]) on success. Return (False, [err_msg, command]) on failure. """ try: proc = procopen(command, usesh=True, preexec_fn=setpgrp, stdin=open(devnull), stderrpipe=True, stdoutpipe=True) # calls to open a shell are aggregated in cylc_subproc.procopen() is_killed_after_timeout = False if timeout: if poll_delay is None: poll_delay = POLL_DELAY timeout_time = time() + timeout while proc.poll() is None: if time() > timeout_time: killpg(proc.pid, SIGTERM) is_killed_after_timeout = True break sleep(poll_delay) out, err = (f.decode() for f in proc.communicate()) res = proc.wait() if res < 0 and is_killed_after_timeout: return (False, [ERR_TIMEOUT % (timeout, -res, err), command]) elif res < 0: return (False, [ERR_SIGNAL % (-res, err), command]) elif res > 0: return (False, [ERR_RETCODE % (res, err), command]) except OSError: # should never do this with shell=True return (False, [ERR_OS, command]) else: return (True, out.strip().splitlines())
def _job_submit_impl( self, job_file_path, batch_sys_name, submit_opts): """Helper for self.jobs_submit() and self.job_submit().""" # Create NN symbolic link, if necessary self._create_nn(job_file_path) for name in JOB_LOG_ERR, JOB_LOG_OUT: try: os.unlink(os.path.join(job_file_path, name)) except OSError: pass # Start new status file job_status_file = open(job_file_path + ".status", "w") job_status_file.write( "%s=%s\n" % (self.CYLC_BATCH_SYS_NAME, batch_sys_name)) job_status_file.close() # Submit job batch_sys = self._get_sys(batch_sys_name) proc_stdin_arg = None proc_stdin_value = open(os.devnull) if hasattr(batch_sys, "get_submit_stdin"): proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin( job_file_path, submit_opts) if isinstance(proc_stdin_arg, str): proc_stdin_arg = proc_stdin_arg.encode() if isinstance(proc_stdin_value, str): proc_stdin_value = proc_stdin_value.encode() if hasattr(batch_sys, "submit"): # batch_sys.submit should handle OSError, if relevant. ret_code, out, err = batch_sys.submit(job_file_path, submit_opts) else: env = None if hasattr(batch_sys, "SUBMIT_CMD_ENV"): env = dict(os.environ) env.update(batch_sys.SUBMIT_CMD_ENV) batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl") if batch_submit_cmd_tmpl: # No need to catch OSError when using shell. It is unlikely # that we do not have a shell, and still manage to get as far # as here. batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path} proc = procopen(batch_sys_cmd, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, usesh=True, env=env) # calls to open a shell are aggregated in # cylc_subproc.procopen() else: command = shlex.split( batch_sys.SUBMIT_CMD_TMPL % {"job": job_file_path}) try: proc = procopen(command, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, env=env) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] return 1, "", str(exc), "" out, err = (f.decode() for f in proc.communicate(proc_stdin_value)) ret_code = proc.wait() # Filter submit command output, if relevant # Get job ID, if possible job_id = None if out or err: try: out, err, job_id = self._filter_submit_output( job_file_path + ".status", batch_sys, out, err) except OSError: ret_code = 1 self.job_kill(job_file_path + ".status") return ret_code, out, err, job_id
if event != 'shutdown': raise SystemExit("ERROR: run this as a shutdown event handler") try: log_dir = os.path.expandvars(os.environ['CYLC_SUITE_LOG_DIR']) suite_dir = os.path.expandvars(os.environ['CYLC_SUITE_DEF_PATH']) except KeyError as exc: raise SystemExit(exc) ref = os.path.join(suite_dir, 'broadcast.ref') log = os.path.join(suite_dir, 'broadcast.log') fref = open(ref, 'r') flog = open(log, 'r') reflines = fref.readlines() loglines = flog.readlines() reflines.sort() loglines.sort() if reflines != loglines: sys.exit("ERROR: broadcast logs do not compare") else: print("broadcast logs compare OK") res = procopen(["cylc check-triggering " + event + " " + suite], usesh=True) status = res.wait() if status != 0: sys.exit(1)
def _job_submit_impl(self, job_file_path, batch_sys_name, submit_opts): """Helper for self.jobs_submit() and self.job_submit().""" # Create NN symbolic link, if necessary self._create_nn(job_file_path) for name in JOB_LOG_ERR, JOB_LOG_OUT: try: os.unlink(os.path.join(job_file_path, name)) except OSError: pass # Start new status file job_status_file = open(job_file_path + ".status", "w") job_status_file.write("%s=%s\n" % (self.CYLC_BATCH_SYS_NAME, batch_sys_name)) job_status_file.close() # Submit job batch_sys = self._get_sys(batch_sys_name) if hasattr(batch_sys, "submit"): # batch_sys.submit should handle OSError, if relevant. ret_code, out, err = batch_sys.submit(job_file_path, submit_opts) else: proc_stdin_arg = None # Set command STDIN to DEVNULL by default to prevent leakage of # STDIN from current environment. proc_stdin_value = DEVNULL # nosec if hasattr(batch_sys, "get_submit_stdin"): proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin( job_file_path, submit_opts) if isinstance(proc_stdin_value, str): proc_stdin_value = proc_stdin_value.encode() env = None if hasattr(batch_sys, "SUBMIT_CMD_ENV"): env = dict(os.environ) env.update(batch_sys.SUBMIT_CMD_ENV) batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl") if batch_submit_cmd_tmpl: # No need to catch OSError when using shell. It is unlikely # that we do not have a shell, and still manage to get as far # as here. batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path} proc = procopen(batch_sys_cmd, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, usesh=True, env=env) # calls to open a shell are aggregated in # cylc_subproc.procopen() else: command = shlex.split(batch_sys.SUBMIT_CMD_TMPL % {"job": job_file_path}) try: proc = procopen(command, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, env=env) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] return 1, "", str(exc), "" out, err = (f.decode() for f in proc.communicate(proc_stdin_value)) ret_code = proc.wait() try: proc_stdin_arg.close() except (AttributeError, IOError): pass # Filter submit command output, if relevant # Get job ID, if possible job_id = None if out or err: try: out, err, job_id = self._filter_submit_output( job_file_path + ".status", batch_sys, out, err) except OSError: ret_code = 1 self.job_kill(job_file_path + ".status") return ret_code, out, err, job_id
def _jobs_poll_batch_sys(self, job_log_root, batch_sys_name, my_ctx_list): """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs).""" exp_job_ids = [ctx.batch_sys_job_id for ctx in my_ctx_list] bad_job_ids = list(exp_job_ids) exp_pids = [] bad_pids = [] items = [[self._get_sys(batch_sys_name), exp_job_ids, bad_job_ids]] if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False): exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None] bad_pids.extend(exp_pids) items.append([self._get_sys("background"), exp_pids, bad_pids]) debug_messages = [] for batch_sys, exp_ids, bad_ids in items: if hasattr(batch_sys, "get_poll_many_cmd"): # Some poll commands may not be as simple cmd = batch_sys.get_poll_many_cmd(exp_ids) else: # if hasattr(batch_sys, "POLL_CMD"): # Simple poll command that takes a list of job IDs cmd = [batch_sys.POLL_CMD] + exp_ids try: proc = procopen(cmd, stdin=open(os.devnull), stderrpipe=True, stdoutpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = cmd[0] sys.stderr.write(str(exc) + "\n") return ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) debug_messages.append('%s - %s' % ( batch_sys, len(out.split('\n')))) sys.stderr.write(err) if (ret_code and hasattr(batch_sys, "POLL_CANT_CONNECT_ERR") and batch_sys.POLL_CANT_CONNECT_ERR in err): # Poll command failed because it cannot connect to batch system # Assume jobs are still healthy until the batch system is back. bad_ids[:] = [] elif hasattr(batch_sys, "filter_poll_many_output"): # Allow custom filter for id_ in batch_sys.filter_poll_many_output(out): try: bad_ids.remove(id_) except ValueError: pass else: # Just about all poll commands return a table, with column 1 # being the job ID. The logic here should be sufficient to # ensure that any table header is ignored. for line in out.splitlines(): try: head = line.split(None, 1)[0] except IndexError: continue if head in exp_ids: try: bad_ids.remove(head) except ValueError: pass debug_flag = False for ctx in my_ctx_list: ctx.batch_sys_exit_polled = int( ctx.batch_sys_job_id in bad_job_ids) # Exited batch system, but process still running # This can happen to jobs in some "at" implementation if ctx.batch_sys_exit_polled and ctx.pid in exp_pids: if ctx.pid not in bad_pids: ctx.batch_sys_exit_polled = 0 else: debug_flag = True # Add information to "job.status" if ctx.batch_sys_exit_polled: try: handle = open(os.path.join( job_log_root, ctx.job_log_dir, JOB_LOG_STATUS), "a") handle.write("%s=%s\n" % ( self.CYLC_BATCH_SYS_EXIT_POLLED, get_current_time_string())) handle.close() except IOError as exc: sys.stderr.write(str(exc) + "\n") if debug_flag: ctx.batch_sys_call_no_lines = ', '.join(debug_messages)
def _jobs_poll_runner(self, job_log_root, job_runner_name, my_ctx_list): """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs).""" exp_job_ids = [ctx.job_id for ctx in my_ctx_list] bad_job_ids = list(exp_job_ids) exp_pids = [] bad_pids = [] items = [[self._get_sys(job_runner_name), exp_job_ids, bad_job_ids]] if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False): exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None] bad_pids.extend(exp_pids) items.append([self._get_sys("background"), exp_pids, bad_pids]) debug_messages = [] for job_runner, exp_ids, bad_ids in items: if hasattr(job_runner, "get_poll_many_cmd"): # Some poll commands may not be as simple cmd = job_runner.get_poll_many_cmd(exp_ids) else: # if hasattr(job_runner, "POLL_CMD"): # Simple poll command that takes a list of job IDs cmd = [job_runner.POLL_CMD, *exp_ids] try: proc = procopen(cmd, stdindevnull=True, stderrpipe=True, stdoutpipe=True) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = cmd[0] sys.stderr.write(f"{exc}\n") return ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) debug_messages.append('{0} - {1}'.format(job_runner, len(out.split('\n')))) sys.stderr.write(err) if (ret_code and hasattr(job_runner, "POLL_CANT_CONNECT_ERR") and job_runner.POLL_CANT_CONNECT_ERR in err): # Poll command failed because it cannot connect to job runner # Assume jobs are still healthy until the job runner is back. bad_ids[:] = [] elif hasattr(job_runner, "filter_poll_many_output"): # Allow custom filter for id_ in job_runner.filter_poll_many_output(out): try: bad_ids.remove(id_) except ValueError: pass else: # Just about all poll commands return a table, with column 1 # being the job ID. The logic here should be sufficient to # ensure that any table header is ignored. for line in out.splitlines(): try: head = line.split(None, 1)[0] except IndexError: continue if head in exp_ids: try: bad_ids.remove(head) except ValueError: pass debug_flag = False for ctx in my_ctx_list: ctx.job_runner_exit_polled = int(ctx.job_id in bad_job_ids) # Exited job runner, but process still running # This can happen to jobs in some "at" implementation if ctx.job_runner_exit_polled and ctx.pid in exp_pids: if ctx.pid not in bad_pids: ctx.job_runner_exit_polled = 0 else: debug_flag = True # Add information to "job.status" if ctx.job_runner_exit_polled: try: handle = open( os.path.join(job_log_root, ctx.job_log_dir, JOB_LOG_STATUS), "a") handle.write("{0}={1}\n".format( self.CYLC_JOB_RUNNER_EXIT_POLLED, get_current_time_string())) handle.close() except IOError as exc: sys.stderr.write(f"{exc}\n") if debug_flag: ctx.job_runner_call_no_lines = ', '.join(debug_messages)
def _remote_init_callback( self, proc_ctx, platform, tmphandle, curve_auth, client_pub_key_dir): """Callback when "cylc remote-init" exits""" self.ready = True try: tmphandle.close() except OSError: # E.g. ignore bad unlink, etc pass self.install_target = platform['install target'] if proc_ctx.ret_code == 0: if REMOTE_INIT_DONE in proc_ctx.out: src_path = get_suite_run_dir(self.suite) dst_path = get_remote_suite_run_dir(platform, self.suite) try: process = procopen(construct_rsync_over_ssh_cmd( src_path, dst_path, platform, self.rsync_includes), stdoutpipe=True, stderrpipe=True, universal_newlines=True) out, err = process.communicate(timeout=600) install_target = platform['install target'] if out: RSYNC_LOG.info( 'File installation information for ' f'{install_target}:\n {out}') if err: LOG.error( 'File installation error on ' f'{install_target}:\n {err}') except Exception as ex: LOG.error(f"Problem during rsync: {ex}") self.remote_init_map[self.install_target] = ( REMOTE_INIT_FAILED) return if "KEYSTART" in proc_ctx.out: regex_result = re.search( 'KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out) key = regex_result.group(1) suite_srv_dir = get_suite_srv_dir(self.suite) public_key = KeyInfo( KeyType.PUBLIC, KeyOwner.CLIENT, suite_srv_dir=suite_srv_dir, install_target=self.install_target ) old_umask = os.umask(0o177) with open( public_key.full_key_path, 'w', encoding='utf8') as text_file: text_file.write(key) os.umask(old_umask) # configure_curve must be called every time certificates are # added or removed, in order to update the Authenticator's # state. curve_auth.configure_curve( domain='*', location=(client_pub_key_dir)) for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED): if status in proc_ctx.out: # Good status LOG.debug(proc_ctx) self.remote_init_map[self.install_target] = status return # Bad status LOG.error(TaskRemoteMgmtError( TaskRemoteMgmtError.MSG_INIT, platform['install target'], ' '.join( quote(item) for item in proc_ctx.cmd), proc_ctx.ret_code, proc_ctx.out, proc_ctx.err)) LOG.error(proc_ctx) self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
def _job_submit_impl(self, job_file_path, job_runner_name, submit_opts): """Helper for self.jobs_submit() and self.job_submit().""" # Create NN symbolic link, if necessary self._create_nn(job_file_path) for name in JOB_LOG_ERR, JOB_LOG_OUT: try: os.unlink(os.path.join(job_file_path, name)) except OSError: pass # Start new status file job_status_file = open(f"{job_file_path}.status", "w") job_status_file.write("{0}={1}\n".format(self.CYLC_JOB_RUNNER_NAME, job_runner_name)) job_status_file.close() # Submit job job_runner = self._get_sys(job_runner_name) if not self.clean_env: # Pass the whole environment to the job submit subprocess. # (Note this runs on the job host). env = os.environ else: # $HOME is required by job.sh on the job host. env = {'HOME': os.environ.get('HOME', '')} # Pass selected extra variables to the job submit subprocess. for var in self.env: env[var] = os.environ.get(var, '') if self.path is not None: # Append to avoid overriding an inherited PATH (e.g. in a venv) env['PATH'] = env.get('PATH', '') + ':' + ':'.join(self.path) if hasattr(job_runner, "submit"): submit_opts['env'] = env # job_runner.submit should handle OSError, if relevant. ret_code, out, err = job_runner.submit(job_file_path, submit_opts) else: proc_stdin_arg = None # Set command STDIN to DEVNULL by default to prevent leakage of # STDIN from current environment. proc_stdin_value = DEVNULL # nosec if hasattr(job_runner, "get_submit_stdin"): proc_stdin_arg, proc_stdin_value = job_runner.get_submit_stdin( job_file_path, submit_opts) if isinstance(proc_stdin_value, str): proc_stdin_value = proc_stdin_value.encode() if hasattr(job_runner, "SUBMIT_CMD_ENV"): env.update(job_runner.SUBMIT_CMD_ENV) job_runner_cmd_tmpl = submit_opts.get("job_runner_cmd_tmpl") if job_runner_cmd_tmpl: # No need to catch OSError when using shell. It is unlikely # that we do not have a shell, and still manage to get as far # as here. job_runner_cmd = job_runner_cmd_tmpl % {"job": job_file_path} proc = procopen(job_runner_cmd, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, usesh=True, env=env) # calls to open a shell are aggregated in # cylc_subproc.procopen() else: command = shlex.split(job_runner.SUBMIT_CMD_TMPL % {"job": job_file_path}) try: proc = procopen(command, stdin=proc_stdin_arg, stdoutpipe=True, stderrpipe=True, env=env) except OSError as exc: # subprocess.Popen has a bad habit of not setting the # filename of the executable when it raises an OSError. if not exc.filename: exc.filename = command[0] return 1, "", str(exc), "" out, err = (f.decode() for f in proc.communicate(proc_stdin_value)) ret_code = proc.wait() try: proc_stdin_arg.close() except (AttributeError, IOError): pass # Filter submit command output, if relevant # Get job ID, if possible job_id = None if out or err: try: out, err, job_id = self._filter_submit_output( f"{job_file_path}.status", job_runner, out, err) except OSError: ret_code = 1 self.job_kill(f"{job_file_path}.status") return ret_code, out, err, job_id
def main(_, options, *args): # suite name or file path suite, suiterc = parse_suite_arg(options, args[0]) # extract task host accounts from the suite config = SuiteConfig( suite, suiterc, options, load_template_vars(options.templatevars, options.templatevars_file)) account_set = set() for name in config.get_namespace_list('all tasks'): account_set.add( (config.get_config(['runtime', name, 'remote', 'owner']), config.get_config(['runtime', name, 'remote', 'host']))) task_remote_mgr = TaskRemoteMgr(suite, SubProcPool()) for _, host_str in account_set: task_remote_mgr.remote_host_select(host_str) accounts = [] while account_set: for user, host_str in account_set.copy(): res = task_remote_mgr.remote_host_select(host_str) if res: account_set.remove((user, host_str)) accounts.append((user, res)) if account_set: task_remote_mgr.proc_pool.process() sleep(1.0) # Interrogate the each remote account with CYLC_VERSION set to our version. # Post backward compatibility concerns to do this we can just run: # cylc version --host=HOST --user=USER # but this command only exists for version > 6.3.0. # So for the moment generate an actual remote invocation command string for # "cylc --version". # (save verbose flag as gets reset in remrun) verbose = cylc.flow.flags.verbose warn = {} contacted = 0 for user, host in sorted(accounts): argv = ["cylc", "version"] if user and host: argv += ["--user=%s" % user, "--host=%s" % host] user_at_host = "%s@%s" % (user, host) elif user: argv += ["--user=%s" % user] user_at_host = "%s@localhost" % user elif host: argv += ["--host=%s" % host] user_at_host = host if verbose: print("%s: %s" % (user_at_host, ' '.join(argv))) proc = procopen(argv, stdin=open(os.devnull), stdoutpipe=True, stderrpipe=True) out, err = proc.communicate() out = out.decode() err = err.decode() if proc.wait() == 0: if verbose: print(" %s" % out) contacted += 1 out = out.strip() if out != CYLC_VERSION: warn[user_at_host] = out else: print('ERROR ' + user_at_host + ':', file=sys.stderr) print(err, file=sys.stderr) # report results if not warn: if contacted: print("All", contacted, "accounts have cylc-" + CYLC_VERSION) else: print("WARNING: failed to invoke cylc-%s on %d accounts:" % (CYLC_VERSION, len(warn))) m = max(len(ac) for ac in warn) for ac, warning in warn.items(): print(' ', ac.ljust(m), warning) if options.error: sys.exit(1)