Example #1
0
    def remote_tidy(self):
        """Remove suite contact files and keys from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for install_target, message in self.remote_init_map.items():
            if message != REMOTE_FILE_INSTALL_DONE:
                continue
            if install_target == get_localhost_install_target():
                continue
            platform = get_random_platform_for_install_target(install_target)
            platform_n = platform['name']
            cmd = ['remote-tidy']
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(install_target)
            cmd.append(get_remote_suite_run_dir(platform, self.suite))
            cmd = construct_ssh_cmd(cmd, platform, timeout='10s')
            LOG.debug("Removing authentication keys and contact file "
                      f"from remote: \"{install_target}\"")
            procs[platform_n] = (cmd,
                                 Popen(cmd,
                                       stdout=PIPE,
                                       stderr=PIPE,
                                       stdin=DEVNULL))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for platform_n, (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[platform_n]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(
                        TaskRemoteMgmtError(
                            TaskRemoteMgmtError.MSG_TIDY, platform_n,
                            ' '.join(quote(item) for item in cmd),
                            proc.returncode, out, err))
        # Terminate any remaining commands
        for platform_n, (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = (f.decode() for f in proc.communicate())
            if proc.wait():
                LOG.warning(
                    TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY,
                                        platform_n,
                                        ' '.join(quote(item) for item in cmd),
                                        proc.returncode, out, err))
Example #2
0
    def remote_tidy(self):
        """Remove suite contact files and keys from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for platform, init_with_contact in self.remote_init_map.items():
            platform = get_platform(platform)
            host = get_host_from_platform(platform)
            owner = platform['owner']
            self.install_target = get_install_target_from_platform(platform)
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['remote-tidy']
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(str(f'{self.install_target}'))
            cmd.append(get_remote_suite_run_dir(platform, self.suite))
            if is_remote_platform(platform):
                cmd = construct_platform_ssh_cmd(cmd, platform, timeout='10s')
            else:
                cmd = ['cylc'] + cmd
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=DEVNULL))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
Example #3
0
    def remote_init(self, platform: Dict[str, Any],
                    curve_auth: 'ThreadAuthenticator',
                    client_pub_key_dir: str) -> None:
        """Initialise a remote host if necessary.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            "python/": if source exists

        Args:
            platform: A dict containing settings relating to platform used in
                this remote installation.
            curve_auth: The ZMQ authenticator.
            client_pub_key_dir: Client public key directory, used by the
                ZMQ authenticator.

        """
        install_target = platform['install target']
        if install_target == get_localhost_install_target():
            self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_DONE
            return
        # Set status of install target to in progress while waiting for remote
        # initialisation to finish
        self.remote_init_map[install_target] = REMOTE_INIT_IN_PROGRESS

        # Determine what items to install
        comm_meth = platform['communication method']
        items = self._remote_init_items(comm_meth)

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # Build the remote-init command to be run over ssh
        cmd = ['remote-init']
        if cylc.flow.flags.debug:
            cmd.append('--debug')
        cmd.append(str(install_target))
        cmd.append(get_remote_suite_run_dir(platform, self.suite))
        dirs_to_symlink = get_dirs_to_symlink(install_target, self.suite)
        for key, value in dirs_to_symlink.items():
            if value is not None:
                cmd.append(f"{key}={quote(value)} ")
        # Create the ssh command
        cmd = construct_ssh_cmd(cmd, platform)
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback,
            [platform, tmphandle, curve_auth, client_pub_key_dir])
Example #4
0
    def file_install(self, platform):
        """Install required files on the remote install target.

        Included by default in the file installation:
            Files:
                .service/server.key  (required for ZMQ authentication)
            Directories:
                app/
                bin/
                etc/
                lib/
        """
        install_target = platform['install target']
        self.remote_init_map[install_target] = REMOTE_FILE_INSTALL_IN_PROGRESS
        src_path = get_workflow_run_dir(self.suite)
        dst_path = get_remote_suite_run_dir(platform, self.suite)
        install_target = platform['install target']
        ctx = SubProcContext(
            'file-install',
            construct_rsync_over_ssh_cmd(src_path, dst_path, platform,
                                         self.rsync_includes))
        LOG.debug(f"Begin file installation on {install_target}")
        self.proc_pool.put_command(ctx, self._file_install_callback,
                                   [install_target])
Example #5
0
 def _load_remote_item(self, item, reg, owner, host):
     """Load content of service item from remote [owner@]host via SSH."""
     if not is_remote(host, owner):
         return
     if host is None:
         host = 'localhost'
     if owner is None:
         owner = get_user()
     if item == self.FILE_BASE_CONTACT and not is_remote_host(host):
         # Attempt to read suite contact file via the local filesystem.
         path = r'%(run_d)s/%(srv_base)s' % {
             'run_d': get_remote_suite_run_dir('localhost', owner, reg),
             'srv_base': self.DIR_BASE_SRV,
         }
         content = self._load_local_item(item, path)
         if content is not None:
             return content
         # Else drop through and attempt via ssh to the suite account.
     # Prefix STDOUT to ensure returned content is relevant
     prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg}
     # Attempt to cat passphrase file under suite service directory
     script = (r"""echo '%(prefix)s'; """
               r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''') % {
                   'prefix': prefix,
                   'run_d': get_remote_suite_run_dir(host, owner, reg),
                   'srv_base': self.DIR_BASE_SRV,
                   'item': item
               }
     import shlex
     command = shlex.split(glbl_cfg().get_host_item('ssh command', host,
                                                    owner))
     command += ['-n', owner + '@' + host, script]
     from subprocess import Popen, PIPE, DEVNULL  # nosec
     try:
         proc = Popen(command, stdin=DEVNULL, stdout=PIPE,
                      stderr=PIPE)  # nosec
     except OSError:
         if cylc.flow.flags.debug:
             import traceback
             traceback.print_exc()
         return
     out, err = (f.decode() for f in proc.communicate())
     ret_code = proc.wait()
     # Extract passphrase from STDOUT
     # It should live in the line with the correct prefix
     content = ""
     can_read = False
     for line in out.splitlines(True):
         if can_read:
             content += line
         elif line.strip() == prefix:
             can_read = True
     if not content or ret_code:
         LOG.debug(
             '$ %(command)s  # code=%(ret_code)s\n%(err)s',
             {
                 'command': command,
                 # STDOUT may contain passphrase, so not safe to print
                 # 'out': out,
                 'err': err,
                 'ret_code': ret_code,
             })
         return
     return content
Example #6
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(get_suite_srv_dir(self.suite),
                                  FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            cmd.append(get_remote_suite_run_dir(host, owner, self.suite))
            procs[(host, owner)] = (cmd,
                                    Popen(cmd,
                                          stdout=PIPE,
                                          stderr=PIPE,
                                          stdin=DEVNULL))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(
                        TaskRemoteMgmtError(
                            TaskRemoteMgmtError.MSG_TIDY, (host, owner),
                            ' '.join(quote(item) for item in cmd),
                            proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(
                    TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY,
                                        (host, owner),
                                        ' '.join(quote(item) for item in cmd),
                                        proc.returncode, out, err))
Example #7
0
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            ".service/passphrase": For TCP task communication
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        comm_meth = glbl_cfg().get_host_item('task communication method', host,
                                             owner)
        owner_at_host = 'localhost'
        if host:
            owner_at_host = host
        if owner:
            owner_at_host = owner + '@' + owner_at_host
        LOG.debug('comm_meth[%s]=%s' % (owner_at_host, comm_meth))
        items = self._remote_init_items(comm_meth)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(get_suite_srv_dir(self.suite),
                                  FILE_BASE_UUID)
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid_str).encode())
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flow.flags.debug:
            cmd.append('--debug')
        if comm_meth in ['ssh']:
            cmd.append('--indirect-comm=%s' % comm_meth)
        cmd.append(str(self.uuid_str))
        cmd.append(get_remote_suite_run_dir(host, owner, self.suite))
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback, [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]
Example #8
0
    def write(self, local_job_file_path, job_conf, check_syntax=True):
        """Write each job script section in turn."""

        # ########### !!!!!!!! WARNING !!!!!!!!!!! #####################
        # BE EXTREMELY WARY OF CHANGING THE ORDER OF JOB SCRIPT SECTIONS
        # Users may be relying on the existing order (see for example
        # the comment below on suite bin path being required before
        # task runtime environment setup).
        # ##############################################################

        # Access to cylc must be configured before user environment so
        # that cylc commands can be used in defining user environment
        # variables: NEXT_CYCLE=$( cylc cycle-point --offset-hours=6 )

        tmp_name = local_job_file_path + '.tmp'
        run_d = get_remote_suite_run_dir(job_conf['host'], job_conf['owner'],
                                         job_conf['suite_name'])
        try:
            with open(tmp_name, 'w') as handle:
                self._write_header(handle, job_conf)
                self._write_directives(handle, job_conf)
                self._write_prelude(handle, job_conf)
                self._write_environment_1(handle, job_conf, run_d)
                self._write_global_init_script(handle, job_conf)
                # suite bin access must be before runtime environment
                # because suite bin commands may be used in variable
                # assignment expressions: FOO=$(command args).
                self._write_environment_2(handle, job_conf)
                self._write_script(handle, job_conf)
                self._write_epilogue(handle, job_conf, run_d)
        except IOError as exc:
            # Remove temporary file
            try:
                os.unlink(tmp_name)
            except OSError:
                pass
            raise exc
        # check syntax
        if check_syntax:
            try:
                proc = Popen(['/bin/bash', '-n', tmp_name],
                             stderr=PIPE,
                             stdin=open(os.devnull))
            except OSError as exc:
                # Popen has a bad habit of not telling you anything if it fails
                # to run the executable.
                if exc.filename is None:
                    exc.filename = '/bin/bash'
                # Remove temporary file
                try:
                    os.unlink(tmp_name)
                except OSError:
                    pass
                raise exc
            else:
                if proc.wait():
                    # This will leave behind the temporary file,
                    # which is useful for debugging syntax errors, etc.
                    raise RuntimeError(proc.communicate()[1].decode())
        # Make job file executable
        mode = (os.stat(tmp_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
                | stat.S_IXOTH)
        os.chmod(tmp_name, mode)
        os.rename(tmp_name, local_job_file_path)
Example #9
0
    def _remote_init_callback(
            self, proc_ctx, platform, tmphandle,
            curve_auth, client_pub_key_dir):
        """Callback when "cylc remote-init" exits"""
        self.ready = True
        try:
            tmphandle.close()
        except OSError:  # E.g. ignore bad unlink, etc
            pass
        self.install_target = platform['install target']
        if proc_ctx.ret_code == 0:
            if REMOTE_INIT_DONE in proc_ctx.out:
                src_path = get_suite_run_dir(self.suite)
                dst_path = get_remote_suite_run_dir(platform, self.suite)
                try:
                    process = procopen(construct_rsync_over_ssh_cmd(
                        src_path,
                        dst_path,
                        platform,
                        self.rsync_includes),
                        stdoutpipe=True,
                        stderrpipe=True,
                        universal_newlines=True)

                    out, err = process.communicate(timeout=600)
                    install_target = platform['install target']
                    if out:
                        RSYNC_LOG.info(
                            'File installation information for '
                            f'{install_target}:\n {out}')
                    if err:
                        LOG.error(
                            'File installation error on '
                            f'{install_target}:\n {err}')
                except Exception as ex:
                    LOG.error(f"Problem during rsync: {ex}")
                    self.remote_init_map[self.install_target] = (
                        REMOTE_INIT_FAILED)
                    return
            if "KEYSTART" in proc_ctx.out:
                regex_result = re.search(
                    'KEYSTART((.|\n|\r)*)KEYEND', proc_ctx.out)
                key = regex_result.group(1)
                suite_srv_dir = get_suite_srv_dir(self.suite)
                public_key = KeyInfo(
                    KeyType.PUBLIC,
                    KeyOwner.CLIENT,
                    suite_srv_dir=suite_srv_dir,
                    install_target=self.install_target
                )
                old_umask = os.umask(0o177)
                with open(
                        public_key.full_key_path,
                        'w', encoding='utf8') as text_file:
                    text_file.write(key)
                os.umask(old_umask)
                # configure_curve must be called every time certificates are
                # added or removed, in order to update the Authenticator's
                # state.
                curve_auth.configure_curve(
                    domain='*', location=(client_pub_key_dir))
            for status in (REMOTE_INIT_DONE, REMOTE_INIT_NOT_REQUIRED):
                if status in proc_ctx.out:
                    # Good status
                    LOG.debug(proc_ctx)
                    self.remote_init_map[self.install_target] = status
                    return
        # Bad status
        LOG.error(TaskRemoteMgmtError(
            TaskRemoteMgmtError.MSG_INIT,
            platform['install target'], ' '.join(
                quote(item) for item in proc_ctx.cmd),
            proc_ctx.ret_code, proc_ctx.out, proc_ctx.err))
        LOG.error(proc_ctx)
        self.remote_init_map[platform['install target']] = REMOTE_INIT_FAILED
Example #10
0
    def remote_init(self, platform, curve_auth,
                    client_pub_key_dir):
        """Initialise a remote [owner@]host if necessary.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            "python/": if source exists

        Args:
            curve_auth (ThreadAuthenticator):
                The ZMQ authenticator.
            client_pub_key_dir (str):
                Client public key directory, used by the ZMQ authenticator.
            platform (dict):
                A dictionary containing settings relating to platform used in
                this remote installation.

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        self.install_target = platform['install target']

        # If task is running locally or the install target is localhost
        # we can skip the rest of this function
        if (self.install_target == 'localhost' or
                self.single_task_mode or
                not is_remote_host(get_host_from_platform(platform))):
            LOG.debug(f"REMOTE INIT NOT REQUIRED for {self.install_target}")
            return REMOTE_INIT_NOT_REQUIRED

        # See if a previous failed attempt to initialize this platform has
        # occurred.
        try:
            status = self.remote_init_map[platform['install target']]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[platform['install target']]
            return status

        # Determine what items to install
        comm_meth = platform['communication method']

        # Get a list of files and folders to install;
        # if nothing needs install say so to remote_init_map and return.
        items = self._remote_init_items(comm_meth)

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # Build the remote-init command to be run over ssh
        cmd = ['remote-init']
        if cylc.flow.flags.debug:
            cmd.append('--debug')
        if comm_meth in ['ssh']:
            cmd.append('--indirect-comm=%s' % comm_meth)
        cmd.append(str(self.install_target))
        cmd.append(get_remote_suite_run_dir(platform, self.suite))
        # Create the ssh command
        cmd = construct_platform_ssh_cmd(cmd, platform)

        self.proc_pool.put_command(
            SubProcContext(
                'remote-init',
                cmd,
                stdin_files=[tmphandle]),
            self._remote_init_callback,
            [platform, tmphandle,
             curve_auth, client_pub_key_dir])
        # None status: Waiting for command to finish
        self.remote_init_map[platform['install target']] = None
        return self.remote_init_map[platform['install target']]