Example #1
0
 def test_sprocess_communicate_with_process(self):
     foo = ' foo'
     bar = ' bar'
     cmd = ["echo", "this is a command" + foo + bar]
     p = procopen(cmd, stdoutpipe=True)
     stdout, _ = p.communicate()
     compare(stdout, b"this is a command foo bar\n")
Example #2
0
 def _run_command_init(cls,
                       ctx,
                       bad_hosts=None,
                       callback=None,
                       callback_args=None,
                       callback_255=None,
                       callback_255_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         with open(file_, 'rb') as openfile:
                             stdin_file.write(openfile.read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(  # noqa: SIM115
                     # (nasty use of file handles, should avoid in future)
                     ctx.cmd_kwargs['stdin_files'][0],
                     'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = DEVNULL
         proc = procopen(
             ctx.cmd,
             stdin=stdin_file,
             stdoutpipe=True,
             stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except OSError as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx,
                               bad_hosts=bad_hosts,
                               callback=callback,
                               callback_args=callback_args,
                               callback_255=callback_255,
                               callback_255_args=callback_255_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Example #3
0
 def test_sprocess_communicate_with_process(self):
     foo = ' foo'
     bar = ' bar'
     cmd = ["echo", "this is a command" + foo + bar]
     p = procopen(cmd, stdoutpipe=True)
     stdout, _ = p.communicate()
     compare(stdout, b"this is a command foo bar\n")
Example #4
0
def main(_, options: 'Values', *ids) -> None:
    workflow_id, _, flow_file = parse_id(
        *ids,
        src=True,
        constraint='workflows',
    )

    # extract task host platforms from the workflow_id
    config = WorkflowConfig(
        workflow_id, flow_file, options,
        load_template_vars(options.templatevars, options.templatevars_file))

    platforms = {
        config.get_config(['runtime', name, 'platform'])
        for name in config.get_namespace_list('all tasks')
    } - {None, 'localhost'}

    # When "workflow run hosts" are formalised as "flow platforms"
    # we can substitute `localhost` for this, in the mean time
    # we will have to assume that flow hosts are configured correctly.

    if not platforms:
        sys.exit(0)

    verbose = cylc.flow.flags.verbosity > 0

    # get the cylc version on each platform
    versions = {}
    for platform_name in sorted(platforms):
        platform = get_platform(platform_name)
        host = get_host_from_platform(platform, bad_hosts=None)
        cmd = construct_ssh_cmd(['version'], platform, host)
        if verbose:
            print(cmd)
        proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
        out, err = proc.communicate()
        out = out.decode()
        err = err.decode()
        if proc.wait() == 0:
            if verbose:
                print("   %s" % out)
            versions[platform_name] = out.strip()
        else:
            versions[platform_name] = f'ERROR: {err.strip()}'

    # report results
    max_len = max((len(platform_name) for platform_name in platforms))
    print(f'{"platform".rjust(max_len)}: cylc version')
    print('-' * (max_len + 14))
    for platform_name, result in versions.items():
        print(f'{platform_name.rjust(max_len)}: {result}')
    if all((version == CYLC_VERSION for version in versions.values())):
        ret_code = 0
    elif options.error:
        ret_code = 1
    else:
        ret_code = 0
    sys.exit(ret_code)
    def job_kill(self, st_file_path):
        """Ask job runner to terminate the job specified in "st_file_path".

        Return 0 on success, non-zero integer on failure.

        """
        # WORKFLOW_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status
        self.configure_workflow_run_dir(st_file_path.rsplit(os.sep, 6)[0])
        try:
            with open(st_file_path) as st_file:
                for line in st_file:
                    if line.startswith(f"{self.CYLC_JOB_RUNNER_NAME}="):
                        job_runner = self._get_sys(line.strip().split("=",
                                                                      1)[1])
                        break
                else:
                    return (1, "Cannot determine job runner from "
                            f"{JOB_LOG_STATUS} file")
                st_file.seek(0, 0)  # rewind
                if getattr(job_runner, "SHOULD_KILL_PROC_GROUP", False):
                    for line in st_file:
                        if line.startswith(CYLC_JOB_PID + "="):
                            pid = line.strip().split("=", 1)[1]
                            try:
                                os.killpg(os.getpgid(int(pid)), SIGKILL)
                            except (OSError, ValueError) as exc:
                                traceback.print_exc()
                                return (1, str(exc))
                            else:
                                return (0, "")
                st_file.seek(0, 0)  # rewind
                if hasattr(job_runner, "KILL_CMD_TMPL"):
                    for line in st_file:
                        if not line.startswith(f"{self.CYLC_JOB_ID}="):
                            continue
                        job_id = line.strip().split("=", 1)[1]
                        command = shlex.split(job_runner.KILL_CMD_TMPL %
                                              {"job_id": job_id})
                        try:
                            proc = procopen(command,
                                            stdindevnull=True,
                                            stderrpipe=True)
                        except OSError as exc:
                            # subprocess.Popen has a bad habit of not setting
                            # the filename of the executable when it raises an
                            # OSError.
                            if not exc.filename:
                                exc.filename = command[0]
                            traceback.print_exc()
                            return (1, str(exc))
                        else:
                            return (proc.wait(),
                                    proc.communicate()[1].decode())
            return (1, f"Cannot determine job ID from {JOB_LOG_STATUS} file")
        except IOError as exc:
            return (1, str(exc))
def cmd_find_ver(module,
                 min_ver,
                 cmd_base,
                 ver_opt,
                 ver_extr,
                 outfile=1,
                 write=True):
    """Print outcome & return Boolean (True for pass) of local module version
    requirement test using relevant custom command base keyword(s),
    version-checking option(s) & version-extraction regex.
    """
    msg = '%s (%s)' % (module, string_ver(min_ver) +
                       '+' if min_ver is not None else 'any')
    for cmd in cmd_base:
        try_next_cmd = True
        if procopen(['which', cmd],
                    stdin=open(os.devnull),
                    stdoutpipe=True,
                    stderrpipe=True).wait():
            res = [NOTFOUND_MSG, False]
        else:
            try:
                output = procopen(
                    [cmd, ver_opt], stdoutpipe=True,
                    stdin=open(os.devnull),
                    stderrpipe=True).communicate()[outfile - 1].decode()\
                    .strip()
                version = re.search(ver_extr, output).groups()[0]
                try_next_cmd = False
                if min_ver is None:
                    res = ['%s (%s)' % (FOUND_NOVER_MSG, version), True]
                elif parse_version(version) >= min_ver:
                    res = ['%s (%s)' % (MINVER_MET_MSG, version), True]
                else:
                    res = ['%s (%s)' % (MINVER_NOTMET_MSG, version), False]
            except AttributeError:
                res = [FOUND_UNKNOWNVER_MSG, False]
        if not try_next_cmd:
            break
    if write:
        shell_align_write('.', msg, res[0])
    return res[1]
Example #7
0
def main(_, options, *args):
    # suite name or file path
    suite, flow_file = parse_suite_arg(options, args[0])

    # extract task host platforms from the suite
    config = SuiteConfig(
        suite, flow_file, options,
        load_template_vars(options.templatevars, options.templatevars_file))

    platforms = {
        config.get_config(['runtime', name, 'platform'])
        for name in config.get_namespace_list('all tasks')
    } - {None, 'localhost'}

    # When "suite run hosts" are formalised as "flow platforms"
    # we can substitute `localhost` for this, in the mean time
    # we will have to assume that flow hosts are configured correctly.

    if not platforms:
        sys.exit(0)

    verbose = cylc.flow.flags.verbose

    # get the cylc version on each platform
    versions = {}
    for platform_name in sorted(platforms):
        platform = get_platform(platform_name)
        cmd = construct_platform_ssh_cmd(['version'], platform)
        if verbose:
            print(cmd)
        proc = procopen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
        out, err = proc.communicate()
        out = out.decode()
        err = err.decode()
        if proc.wait() == 0:
            if verbose:
                print("   %s" % out)
            versions[platform_name] = out.strip()
        else:
            versions[platform_name] = f'ERROR: {err.strip()}'

    # report results
    max_len = max((len(platform_name) for platform_name in platforms))
    print(f'{"platform".rjust(max_len)}: cylc version')
    print('-' * (max_len + 14))
    for platform_name, result in versions.items():
        print(f'{platform_name.rjust(max_len)}: {result}')
    if all((version == CYLC_VERSION for version in versions.values())):
        exit = 0
    elif options.error:
        exit = 1
    else:
        exit = 0
    sys.exit(exit)
Example #8
0
    def job_kill(self, st_file_path):
        """Ask batch system to terminate the job specified in "st_file_path".

        Return 0 on success, non-zero integer on failure.

        """
        # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status
        self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0])
        try:
            st_file = open(st_file_path)
            for line in st_file:
                if line.startswith(self.CYLC_BATCH_SYS_NAME + "="):
                    batch_sys = self._get_sys(line.strip().split("=", 1)[1])
                    break
            else:
                return (1, "Cannot determine batch system from %s file" %
                        (JOB_LOG_STATUS))
            st_file.seek(0, 0)  # rewind
            if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False):
                for line in st_file:
                    if line.startswith(CYLC_JOB_PID + "="):
                        pid = line.strip().split("=", 1)[1]
                        try:
                            os.killpg(os.getpgid(int(pid)), SIGKILL)
                        except (OSError, ValueError) as exc:
                            traceback.print_exc()
                            return (1, str(exc))
                        else:
                            return (0, "")
            st_file.seek(0, 0)  # rewind
            if hasattr(batch_sys, "KILL_CMD_TMPL"):
                for line in st_file:
                    if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="):
                        continue
                    job_id = line.strip().split("=", 1)[1]
                    command = shlex.split(batch_sys.KILL_CMD_TMPL %
                                          {"job_id": job_id})
                    try:
                        proc = procopen(command,
                                        stdindevnull=True,
                                        stderrpipe=True)
                    except OSError as exc:
                        # subprocess.Popen has a bad habit of not setting the
                        # filename of the executable when it raises an OSError.
                        if not exc.filename:
                            exc.filename = command[0]
                        traceback.print_exc()
                        return (1, str(exc))
                    else:
                        return (proc.wait(), proc.communicate()[1].decode())
            return (1, "Cannot determine batch job ID from %s file" %
                    (JOB_LOG_STATUS))
        except IOError as exc:
            return (1, str(exc))
Example #9
0
    def job_kill(self, st_file_path):
        """Ask batch system to terminate the job specified in "st_file_path".

        Return 0 on success, non-zero integer on failure.

        """
        # SUITE_RUN_DIR/log/job/CYCLE/TASK/SUBMIT/job.status
        self.configure_suite_run_dir(st_file_path.rsplit(os.sep, 6)[0])
        try:
            st_file = open(st_file_path)
            for line in st_file:
                if line.startswith(self.CYLC_BATCH_SYS_NAME + "="):
                    batch_sys = self._get_sys(line.strip().split("=", 1)[1])
                    break
            else:
                return (1,
                        "Cannot determine batch system from %s file" % (
                            JOB_LOG_STATUS))
            st_file.seek(0, 0)  # rewind
            if getattr(batch_sys, "SHOULD_KILL_PROC_GROUP", False):
                for line in st_file:
                    if line.startswith(CYLC_JOB_PID + "="):
                        pid = line.strip().split("=", 1)[1]
                        try:
                            os.killpg(os.getpgid(int(pid)), SIGKILL)
                        except (OSError, ValueError) as exc:
                            traceback.print_exc()
                            return (1, str(exc))
                        else:
                            return (0, "")
            st_file.seek(0, 0)  # rewind
            if hasattr(batch_sys, "KILL_CMD_TMPL"):
                for line in st_file:
                    if not line.startswith(self.CYLC_BATCH_SYS_JOB_ID + "="):
                        continue
                    job_id = line.strip().split("=", 1)[1]
                    command = shlex.split(
                        batch_sys.KILL_CMD_TMPL % {"job_id": job_id})
                    try:
                        proc = procopen(command, stdin=open(os.devnull),
                                        stderrpipe=True)
                    except OSError as exc:
                        # subprocess.Popen has a bad habit of not setting the
                        # filename of the executable when it raises an OSError.
                        if not exc.filename:
                            exc.filename = command[0]
                        traceback.print_exc()
                        return (1, str(exc))
                    else:
                        return (proc.wait(), proc.communicate()[1].decode())
            return (1, "Cannot determine batch job ID from %s file" % (
                       JOB_LOG_STATUS))
        except IOError as exc:
            return (1, str(exc))
Example #10
0
def output_width(min_width=65, max_width=90):
    """Return a suitable output alignment width given user terminal width."""
    proc = procopen(['stty', 'size'], stdoutpipe=True)
    if proc.wait():
        return int((min_width + max_width) / 2)
    else:
        try:
            return max(min_width,
                       min(max_width, int(proc.communicate()[0].split()[1])))
        except IndexError:
            return int((min_width + max_width) / 2)
Example #11
0
 def _run_command_init(cls, ctx, callback=None, callback_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         stdin_file.write(open(file_, 'rb').read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(
                     ctx.cmd_kwargs['stdin_files'][0], 'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = open(os.devnull)
         proc = procopen(
             ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except (IOError, OSError) as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx, callback, callback_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Example #12
0
def run_get_stdout(command, timeout=None, poll_delay=None):
    """Get standard output from a shell command.

    If "timeout" is specified, it should be the number of seconds before
    timeout.  On timeout, the command will be killed. The argument "poll_delay"
    is only relevant if "timeout" is specified. It specifies the intervals in
    number of seconds between polling for the completion of the command.

    Return (True, [stdoutline1, ...]) on success.
    Return (False, [err_msg, command]) on failure.

    """
    try:
        proc = procopen(command,
                        usesh=True,
                        preexec_fn=setpgrp,
                        stdin=open(devnull),
                        stderrpipe=True,
                        stdoutpipe=True)
        # calls to open a shell are aggregated in cylc_subproc.procopen()
        is_killed_after_timeout = False
        if timeout:
            if poll_delay is None:
                poll_delay = POLL_DELAY
            timeout_time = time() + timeout
            while proc.poll() is None:
                if time() > timeout_time:
                    killpg(proc.pid, SIGTERM)
                    is_killed_after_timeout = True
                    break
                sleep(poll_delay)
        out, err = (f.decode() for f in proc.communicate())
        res = proc.wait()
        if res < 0 and is_killed_after_timeout:
            return (False, [ERR_TIMEOUT % (timeout, -res, err), command])
        elif res < 0:
            return (False, [ERR_SIGNAL % (-res, err), command])
        elif res > 0:
            return (False, [ERR_RETCODE % (res, err), command])
    except OSError:  # should never do this with shell=True
        return (False, [ERR_OS, command])
    else:
        return (True, out.strip().splitlines())
Example #13
0
    def _job_submit_impl(
            self, job_file_path, batch_sys_name, submit_opts):
        """Helper for self.jobs_submit() and self.job_submit()."""

        # Create NN symbolic link, if necessary
        self._create_nn(job_file_path)
        for name in JOB_LOG_ERR, JOB_LOG_OUT:
            try:
                os.unlink(os.path.join(job_file_path, name))
            except OSError:
                pass

        # Start new status file
        job_status_file = open(job_file_path + ".status", "w")
        job_status_file.write(
            "%s=%s\n" % (self.CYLC_BATCH_SYS_NAME, batch_sys_name))
        job_status_file.close()

        # Submit job
        batch_sys = self._get_sys(batch_sys_name)
        proc_stdin_arg = None
        proc_stdin_value = open(os.devnull)
        if hasattr(batch_sys, "get_submit_stdin"):
            proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin(
                job_file_path, submit_opts)
            if isinstance(proc_stdin_arg, str):
                proc_stdin_arg = proc_stdin_arg.encode()
            if isinstance(proc_stdin_value, str):
                proc_stdin_value = proc_stdin_value.encode()
        if hasattr(batch_sys, "submit"):
            # batch_sys.submit should handle OSError, if relevant.
            ret_code, out, err = batch_sys.submit(job_file_path, submit_opts)
        else:
            env = None
            if hasattr(batch_sys, "SUBMIT_CMD_ENV"):
                env = dict(os.environ)
                env.update(batch_sys.SUBMIT_CMD_ENV)
            batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl")
            if batch_submit_cmd_tmpl:
                # No need to catch OSError when using shell. It is unlikely
                # that we do not have a shell, and still manage to get as far
                # as here.
                batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path}
                proc = procopen(batch_sys_cmd, stdin=proc_stdin_arg,
                                stdoutpipe=True, stderrpipe=True, usesh=True,
                                env=env)
                # calls to open a shell are aggregated in
                # cylc_subproc.procopen()
            else:
                command = shlex.split(
                    batch_sys.SUBMIT_CMD_TMPL % {"job": job_file_path})
                try:
                    proc = procopen(command, stdin=proc_stdin_arg,
                                    stdoutpipe=True, stderrpipe=True, env=env)
                except OSError as exc:
                    # subprocess.Popen has a bad habit of not setting the
                    # filename of the executable when it raises an OSError.
                    if not exc.filename:
                        exc.filename = command[0]
                    return 1, "", str(exc), ""
            out, err = (f.decode() for f in proc.communicate(proc_stdin_value))
            ret_code = proc.wait()

        # Filter submit command output, if relevant
        # Get job ID, if possible
        job_id = None
        if out or err:
            try:
                out, err, job_id = self._filter_submit_output(
                    job_file_path + ".status", batch_sys, out, err)
            except OSError:
                ret_code = 1
                self.job_kill(job_file_path + ".status")

        return ret_code, out, err, job_id
Example #14
0
if event != 'shutdown':
    raise SystemExit("ERROR: run this as a shutdown event handler")

try:
    log_dir = os.path.expandvars(os.environ['CYLC_SUITE_LOG_DIR'])
    suite_dir = os.path.expandvars(os.environ['CYLC_SUITE_DEF_PATH'])
except KeyError as exc:
    raise SystemExit(exc)

ref = os.path.join(suite_dir, 'broadcast.ref')
log = os.path.join(suite_dir, 'broadcast.log')

fref = open(ref, 'r')
flog = open(log, 'r')

reflines = fref.readlines()
loglines = flog.readlines()

reflines.sort()
loglines.sort()

if reflines != loglines:
    sys.exit("ERROR: broadcast logs do not compare")
else:
    print("broadcast logs compare OK")

res = procopen(["cylc check-triggering " + event + " " + suite], usesh=True)
status = res.wait()
if status != 0:
    sys.exit(1)
Example #15
0
    def _job_submit_impl(self, job_file_path, batch_sys_name, submit_opts):
        """Helper for self.jobs_submit() and self.job_submit()."""

        # Create NN symbolic link, if necessary
        self._create_nn(job_file_path)
        for name in JOB_LOG_ERR, JOB_LOG_OUT:
            try:
                os.unlink(os.path.join(job_file_path, name))
            except OSError:
                pass

        # Start new status file
        job_status_file = open(job_file_path + ".status", "w")
        job_status_file.write("%s=%s\n" %
                              (self.CYLC_BATCH_SYS_NAME, batch_sys_name))
        job_status_file.close()

        # Submit job
        batch_sys = self._get_sys(batch_sys_name)
        if hasattr(batch_sys, "submit"):
            # batch_sys.submit should handle OSError, if relevant.
            ret_code, out, err = batch_sys.submit(job_file_path, submit_opts)
        else:
            proc_stdin_arg = None
            # Set command STDIN to DEVNULL by default to prevent leakage of
            # STDIN from current environment.
            proc_stdin_value = DEVNULL  # nosec
            if hasattr(batch_sys, "get_submit_stdin"):
                proc_stdin_arg, proc_stdin_value = batch_sys.get_submit_stdin(
                    job_file_path, submit_opts)
                if isinstance(proc_stdin_value, str):
                    proc_stdin_value = proc_stdin_value.encode()
            env = None
            if hasattr(batch_sys, "SUBMIT_CMD_ENV"):
                env = dict(os.environ)
                env.update(batch_sys.SUBMIT_CMD_ENV)
            batch_submit_cmd_tmpl = submit_opts.get("batch_submit_cmd_tmpl")
            if batch_submit_cmd_tmpl:
                # No need to catch OSError when using shell. It is unlikely
                # that we do not have a shell, and still manage to get as far
                # as here.
                batch_sys_cmd = batch_submit_cmd_tmpl % {"job": job_file_path}
                proc = procopen(batch_sys_cmd,
                                stdin=proc_stdin_arg,
                                stdoutpipe=True,
                                stderrpipe=True,
                                usesh=True,
                                env=env)
                # calls to open a shell are aggregated in
                # cylc_subproc.procopen()
            else:
                command = shlex.split(batch_sys.SUBMIT_CMD_TMPL %
                                      {"job": job_file_path})
                try:
                    proc = procopen(command,
                                    stdin=proc_stdin_arg,
                                    stdoutpipe=True,
                                    stderrpipe=True,
                                    env=env)
                except OSError as exc:
                    # subprocess.Popen has a bad habit of not setting the
                    # filename of the executable when it raises an OSError.
                    if not exc.filename:
                        exc.filename = command[0]
                    return 1, "", str(exc), ""
            out, err = (f.decode() for f in proc.communicate(proc_stdin_value))
            ret_code = proc.wait()
            try:
                proc_stdin_arg.close()
            except (AttributeError, IOError):
                pass

        # Filter submit command output, if relevant
        # Get job ID, if possible
        job_id = None
        if out or err:
            try:
                out, err, job_id = self._filter_submit_output(
                    job_file_path + ".status", batch_sys, out, err)
            except OSError:
                ret_code = 1
                self.job_kill(job_file_path + ".status")

        return ret_code, out, err, job_id
Example #16
0
    def _jobs_poll_batch_sys(self, job_log_root, batch_sys_name, my_ctx_list):
        """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs)."""
        exp_job_ids = [ctx.batch_sys_job_id for ctx in my_ctx_list]
        bad_job_ids = list(exp_job_ids)
        exp_pids = []
        bad_pids = []
        items = [[self._get_sys(batch_sys_name), exp_job_ids, bad_job_ids]]
        if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False):
            exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None]
            bad_pids.extend(exp_pids)
            items.append([self._get_sys("background"), exp_pids, bad_pids])
        debug_messages = []
        for batch_sys, exp_ids, bad_ids in items:
            if hasattr(batch_sys, "get_poll_many_cmd"):
                # Some poll commands may not be as simple
                cmd = batch_sys.get_poll_many_cmd(exp_ids)
            else:  # if hasattr(batch_sys, "POLL_CMD"):
                # Simple poll command that takes a list of job IDs
                cmd = [batch_sys.POLL_CMD] + exp_ids
            try:
                proc = procopen(cmd, stdin=open(os.devnull),
                                stderrpipe=True, stdoutpipe=True)
            except OSError as exc:
                # subprocess.Popen has a bad habit of not setting the
                # filename of the executable when it raises an OSError.
                if not exc.filename:
                    exc.filename = cmd[0]
                sys.stderr.write(str(exc) + "\n")
                return
            ret_code = proc.wait()
            out, err = (f.decode() for f in proc.communicate())
            debug_messages.append('%s - %s' % (
                batch_sys, len(out.split('\n'))))
            sys.stderr.write(err)
            if (ret_code and hasattr(batch_sys, "POLL_CANT_CONNECT_ERR") and
                    batch_sys.POLL_CANT_CONNECT_ERR in err):
                # Poll command failed because it cannot connect to batch system
                # Assume jobs are still healthy until the batch system is back.
                bad_ids[:] = []
            elif hasattr(batch_sys, "filter_poll_many_output"):
                # Allow custom filter
                for id_ in batch_sys.filter_poll_many_output(out):
                    try:
                        bad_ids.remove(id_)
                    except ValueError:
                        pass
            else:
                # Just about all poll commands return a table, with column 1
                # being the job ID. The logic here should be sufficient to
                # ensure that any table header is ignored.
                for line in out.splitlines():
                    try:
                        head = line.split(None, 1)[0]
                    except IndexError:
                        continue
                    if head in exp_ids:
                        try:
                            bad_ids.remove(head)
                        except ValueError:
                            pass

        debug_flag = False
        for ctx in my_ctx_list:
            ctx.batch_sys_exit_polled = int(
                ctx.batch_sys_job_id in bad_job_ids)
            # Exited batch system, but process still running
            # This can happen to jobs in some "at" implementation
            if ctx.batch_sys_exit_polled and ctx.pid in exp_pids:
                if ctx.pid not in bad_pids:
                    ctx.batch_sys_exit_polled = 0
                else:
                    debug_flag = True
            # Add information to "job.status"
            if ctx.batch_sys_exit_polled:
                try:
                    handle = open(os.path.join(
                        job_log_root, ctx.job_log_dir, JOB_LOG_STATUS), "a")
                    handle.write("%s=%s\n" % (
                        self.CYLC_BATCH_SYS_EXIT_POLLED,
                        get_current_time_string()))
                    handle.close()
                except IOError as exc:
                    sys.stderr.write(str(exc) + "\n")

        if debug_flag:
            ctx.batch_sys_call_no_lines = ', '.join(debug_messages)
Example #17
0
    def _jobs_poll_runner(self, job_log_root, job_runner_name, my_ctx_list):
        """Helper 2 for self.jobs_poll(job_log_root, job_log_dirs)."""
        exp_job_ids = [ctx.job_id for ctx in my_ctx_list]
        bad_job_ids = list(exp_job_ids)
        exp_pids = []
        bad_pids = []
        items = [[self._get_sys(job_runner_name), exp_job_ids, bad_job_ids]]
        if getattr(items[0][0], "SHOULD_POLL_PROC_GROUP", False):
            exp_pids = [ctx.pid for ctx in my_ctx_list if ctx.pid is not None]
            bad_pids.extend(exp_pids)
            items.append([self._get_sys("background"), exp_pids, bad_pids])
        debug_messages = []
        for job_runner, exp_ids, bad_ids in items:
            if hasattr(job_runner, "get_poll_many_cmd"):
                # Some poll commands may not be as simple
                cmd = job_runner.get_poll_many_cmd(exp_ids)
            else:  # if hasattr(job_runner, "POLL_CMD"):
                # Simple poll command that takes a list of job IDs
                cmd = [job_runner.POLL_CMD, *exp_ids]
            try:
                proc = procopen(cmd,
                                stdindevnull=True,
                                stderrpipe=True,
                                stdoutpipe=True)
            except OSError as exc:
                # subprocess.Popen has a bad habit of not setting the
                # filename of the executable when it raises an OSError.
                if not exc.filename:
                    exc.filename = cmd[0]
                sys.stderr.write(f"{exc}\n")
                return
            ret_code = proc.wait()
            out, err = (f.decode() for f in proc.communicate())
            debug_messages.append('{0} - {1}'.format(job_runner,
                                                     len(out.split('\n'))))
            sys.stderr.write(err)
            if (ret_code and hasattr(job_runner, "POLL_CANT_CONNECT_ERR")
                    and job_runner.POLL_CANT_CONNECT_ERR in err):
                # Poll command failed because it cannot connect to job runner
                # Assume jobs are still healthy until the job runner is back.
                bad_ids[:] = []
            elif hasattr(job_runner, "filter_poll_many_output"):
                # Allow custom filter
                for id_ in job_runner.filter_poll_many_output(out):
                    try:
                        bad_ids.remove(id_)
                    except ValueError:
                        pass
            else:
                # Just about all poll commands return a table, with column 1
                # being the job ID. The logic here should be sufficient to
                # ensure that any table header is ignored.
                for line in out.splitlines():
                    try:
                        head = line.split(None, 1)[0]
                    except IndexError:
                        continue
                    if head in exp_ids:
                        try:
                            bad_ids.remove(head)
                        except ValueError:
                            pass

        debug_flag = False
        for ctx in my_ctx_list:
            ctx.job_runner_exit_polled = int(ctx.job_id in bad_job_ids)
            # Exited job runner, but process still running
            # This can happen to jobs in some "at" implementation
            if ctx.job_runner_exit_polled and ctx.pid in exp_pids:
                if ctx.pid not in bad_pids:
                    ctx.job_runner_exit_polled = 0
                else:
                    debug_flag = True
            # Add information to "job.status"
            if ctx.job_runner_exit_polled:
                try:
                    handle = open(
                        os.path.join(job_log_root, ctx.job_log_dir,
                                     JOB_LOG_STATUS), "a")
                    handle.write("{0}={1}\n".format(
                        self.CYLC_JOB_RUNNER_EXIT_POLLED,
                        get_current_time_string()))
                    handle.close()
                except IOError as exc:
                    sys.stderr.write(f"{exc}\n")

        if debug_flag:
            ctx.job_runner_call_no_lines = ', '.join(debug_messages)
Example #18
0
    def _remote_init_callback(
            self, proc_ctx, platform, tmphandle,
            curve_auth, client_pub_key_dir):
        """Callback when "cylc remote-init" exits"""
        self.ready = True
        try:
            tmphandle.close()
        except OSError:  # E.g. ignore bad unlink, etc
            pass
        self.install_target = platform['install target']
        if proc_ctx.ret_code == 0:
            if REMOTE_INIT_DONE in proc_ctx.out:
                src_path = get_suite_run_dir(self.suite)
                dst_path = get_remote_suite_run_dir(platform, self.suite)
                try:
                    process = procopen(construct_rsync_over_ssh_cmd(
                        src_path,
                        dst_path,
                        platform,
                        self.rsync_includes),
                        stdoutpipe=True,
                        stderrpipe=True,
                        universal_newlines=True)

                    out, err = process.communicate(timeout=600)
                    install_target = platform['install target']
                    if out:
                        RSYNC_LOG.info(
                            'File installation information for '
                            f'{install_target}:\n {out}')
                    if err:
                        LOG.error(
                            'File installation error on '
                            f'{install_target}:\n {err}')
                except Exception as ex:
                    LOG.error(f"Problem during rsync: {ex}")
                    self.remote_init_map[self.install_target] = (
                        REMOTE_INIT_FAILED)
                    return
            if "KEYSTART" in proc_ctx.out:
                regex_result = re.search(
                    'KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out)
                key = regex_result.group(1)
                suite_srv_dir = get_suite_srv_dir(self.suite)
                public_key = KeyInfo(
                    KeyType.PUBLIC,
                    KeyOwner.CLIENT,
                    suite_srv_dir=suite_srv_dir,
                    install_target=self.install_target
                )
                old_umask = os.umask(0o177)
                with open(
                        public_key.full_key_path,
                        'w', encoding='utf8') as text_file:
                    text_file.write(key)
                os.umask(old_umask)
                # configure_curve must be called every time certificates are
                # added or removed, in order to update the Authenticator's
                # state.
                curve_auth.configure_curve(
                    domain='*', location=(client_pub_key_dir))
            for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
                if status in proc_ctx.out:
                    # Good status
                    LOG.debug(proc_ctx)
                    self.remote_init_map[self.install_target] = status
                    return
        # Bad status
        LOG.error(TaskRemoteMgmtError(
            TaskRemoteMgmtError.MSG_INIT,
            platform['install target'], ' '.join(
                quote(item) for item in proc_ctx.cmd),
            proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
        LOG.error(proc_ctx)
        self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
Example #19
0
    def _job_submit_impl(self, job_file_path, job_runner_name, submit_opts):
        """Helper for self.jobs_submit() and self.job_submit()."""

        # Create NN symbolic link, if necessary
        self._create_nn(job_file_path)
        for name in JOB_LOG_ERR, JOB_LOG_OUT:
            try:
                os.unlink(os.path.join(job_file_path, name))
            except OSError:
                pass

        # Start new status file
        job_status_file = open(f"{job_file_path}.status", "w")
        job_status_file.write("{0}={1}\n".format(self.CYLC_JOB_RUNNER_NAME,
                                                 job_runner_name))
        job_status_file.close()

        # Submit job
        job_runner = self._get_sys(job_runner_name)
        if not self.clean_env:
            # Pass the whole environment to the job submit subprocess.
            # (Note this runs on the job host).
            env = os.environ
        else:
            # $HOME is required by job.sh on the job host.
            env = {'HOME': os.environ.get('HOME', '')}
        # Pass selected extra variables to the job submit subprocess.
        for var in self.env:
            env[var] = os.environ.get(var, '')
        if self.path is not None:
            # Append to avoid overriding an inherited PATH (e.g. in a venv)
            env['PATH'] = env.get('PATH', '') + ':' + ':'.join(self.path)
        if hasattr(job_runner, "submit"):
            submit_opts['env'] = env
            # job_runner.submit should handle OSError, if relevant.
            ret_code, out, err = job_runner.submit(job_file_path, submit_opts)
        else:
            proc_stdin_arg = None
            # Set command STDIN to DEVNULL by default to prevent leakage of
            # STDIN from current environment.
            proc_stdin_value = DEVNULL  # nosec
            if hasattr(job_runner, "get_submit_stdin"):
                proc_stdin_arg, proc_stdin_value = job_runner.get_submit_stdin(
                    job_file_path, submit_opts)
                if isinstance(proc_stdin_value, str):
                    proc_stdin_value = proc_stdin_value.encode()
            if hasattr(job_runner, "SUBMIT_CMD_ENV"):
                env.update(job_runner.SUBMIT_CMD_ENV)
            job_runner_cmd_tmpl = submit_opts.get("job_runner_cmd_tmpl")
            if job_runner_cmd_tmpl:
                # No need to catch OSError when using shell. It is unlikely
                # that we do not have a shell, and still manage to get as far
                # as here.
                job_runner_cmd = job_runner_cmd_tmpl % {"job": job_file_path}
                proc = procopen(job_runner_cmd,
                                stdin=proc_stdin_arg,
                                stdoutpipe=True,
                                stderrpipe=True,
                                usesh=True,
                                env=env)
                # calls to open a shell are aggregated in
                # cylc_subproc.procopen()
            else:
                command = shlex.split(job_runner.SUBMIT_CMD_TMPL %
                                      {"job": job_file_path})
                try:
                    proc = procopen(command,
                                    stdin=proc_stdin_arg,
                                    stdoutpipe=True,
                                    stderrpipe=True,
                                    env=env)
                except OSError as exc:
                    # subprocess.Popen has a bad habit of not setting the
                    # filename of the executable when it raises an OSError.
                    if not exc.filename:
                        exc.filename = command[0]
                    return 1, "", str(exc), ""
            out, err = (f.decode() for f in proc.communicate(proc_stdin_value))
            ret_code = proc.wait()
            try:
                proc_stdin_arg.close()
            except (AttributeError, IOError):
                pass

        # Filter submit command output, if relevant
        # Get job ID, if possible
        job_id = None
        if out or err:
            try:
                out, err, job_id = self._filter_submit_output(
                    f"{job_file_path}.status", job_runner, out, err)
            except OSError:
                ret_code = 1
                self.job_kill(f"{job_file_path}.status")

        return ret_code, out, err, job_id
Example #20
0
def main(_, options, *args):
    # suite name or file path
    suite, suiterc = parse_suite_arg(options, args[0])

    # extract task host accounts from the suite
    config = SuiteConfig(
        suite, suiterc, options,
        load_template_vars(options.templatevars, options.templatevars_file))
    account_set = set()
    for name in config.get_namespace_list('all tasks'):
        account_set.add(
            (config.get_config(['runtime', name, 'remote', 'owner']),
             config.get_config(['runtime', name, 'remote', 'host'])))
    task_remote_mgr = TaskRemoteMgr(suite, SubProcPool())
    for _, host_str in account_set:
        task_remote_mgr.remote_host_select(host_str)
    accounts = []
    while account_set:
        for user, host_str in account_set.copy():
            res = task_remote_mgr.remote_host_select(host_str)
            if res:
                account_set.remove((user, host_str))
                accounts.append((user, res))
        if account_set:
            task_remote_mgr.proc_pool.process()
            sleep(1.0)

    # Interrogate the each remote account with CYLC_VERSION set to our version.
    # Post backward compatibility concerns to do this we can just run:
    #   cylc version --host=HOST --user=USER
    # but this command only exists for version > 6.3.0.
    # So for the moment generate an actual remote invocation command string for
    # "cylc --version".

    # (save verbose flag as gets reset in remrun)
    verbose = cylc.flow.flags.verbose

    warn = {}
    contacted = 0
    for user, host in sorted(accounts):
        argv = ["cylc", "version"]
        if user and host:
            argv += ["--user=%s" % user, "--host=%s" % host]
            user_at_host = "%s@%s" % (user, host)
        elif user:
            argv += ["--user=%s" % user]
            user_at_host = "%s@localhost" % user
        elif host:
            argv += ["--host=%s" % host]
            user_at_host = host
        if verbose:
            print("%s: %s" % (user_at_host, ' '.join(argv)))
        proc = procopen(argv,
                        stdin=open(os.devnull),
                        stdoutpipe=True,
                        stderrpipe=True)
        out, err = proc.communicate()
        out = out.decode()
        err = err.decode()
        if proc.wait() == 0:
            if verbose:
                print("   %s" % out)
            contacted += 1
            out = out.strip()
            if out != CYLC_VERSION:
                warn[user_at_host] = out
        else:
            print('ERROR ' + user_at_host + ':', file=sys.stderr)
            print(err, file=sys.stderr)

    # report results
    if not warn:
        if contacted:
            print("All", contacted, "accounts have cylc-" + CYLC_VERSION)
    else:
        print("WARNING: failed to invoke cylc-%s on %d accounts:" %
              (CYLC_VERSION, len(warn)))
        m = max(len(ac) for ac in warn)
        for ac, warning in warn.items():
            print(' ', ac.ljust(m), warning)
        if options.error:
            sys.exit(1)