Exemple #1
0
    def update(self):
        """Update tree, if necessary."""
        if self.next_scan_time is not None and self.next_scan_time > time():
            return

        # Scan for running suites
        choices = []
        for host, port, suite_identity in scan_many(get_scan_items_from_fs(),
                                                    timeout=self.timeout):
            name = suite_identity[KEY_NAME]
            owner = suite_identity[KEY_OWNER]
            if is_remote_user(owner):
                continue  # current user only
            auth = "%s:%s" % (host, port)
            choices.append((name, auth))
        choices.sort()
        self.next_scan_time = time() + self.SCAN_INTERVAL
        if choices == self.running_choices:
            return

        # Update tree if running suites changed
        self.running_choices = choices
        self.construct_newtree()
        self.update_treestore(self.newtree,
                              self.regd_treestore.get_iter_first())
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if cylc.flags.debug:
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(
                GLOBAL_CFG.get_derived_host_item(suite,
                                                 "suite job log directory",
                                                 host, owner))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(
                    self.task_events_mgr.get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(SuiteProcContext(cmd_key, cmd),
                                       callback, [suite, itasks])
Exemple #3
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if LOG.isEnabledFor(DEBUG):
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, "suite job log directory", host, owner))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(get_task_job_id(
                    itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SubProcContext(cmd_key, cmd), callback, [suite, itasks])
Exemple #4
0
    def update(self):
        """Update tree, if necessary."""
        if self.next_scan_time is not None and self.next_scan_time > time():
            return

        # Scan for running suites
        choices = []
        for host, port, suite_identity in scan_many(
                get_scan_items_from_fs(), timeout=self.timeout):
            name = suite_identity[KEY_NAME]
            owner = suite_identity[KEY_OWNER]
            if is_remote_user(owner):
                continue  # current user only
            auth = "%s:%s" % (host, port)
            choices.append((name, auth))
        choices.sort()
        self.next_scan_time = time() + self.SCAN_INTERVAL
        if choices == self.running_choices:
            return

        # Update tree if running suites changed
        self.running_choices = choices
        self.construct_newtree()
        self.update_treestore(
            self.newtree, self.regd_treestore.get_iter_first())
Exemple #5
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flags.debug:
                cmd.append('--debug')
            cmd.append(os.path.join(glbl_cfg().get_derived_host_item(
                self.suite, 'suite run directory', host, owner)))
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flags.debug:
                cmd.append('--debug')
            cmd.append(os.path.join(glbl_cfg().get_derived_host_item(
                self.suite, 'suite run directory', host, owner)))
            procs[(host, owner)] = (
                cmd,
                Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    LOG.warning(TaskRemoteMgmtError(
                        TaskRemoteMgmtError.MSG_TIDY,
                        (host, owner), ' '.join(quote(item) for item in cmd),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(TaskRemoteMgmtError(
                    TaskRemoteMgmtError.MSG_TIDY,
                    (host, owner), ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err))
Exemple #7
0
    def get_host_item(self,
                      item,
                      host=None,
                      owner=None,
                      replace_home=False,
                      owner_home=None):
        """This allows hosts with no matching entry in the config file
        to default to appropriately modified localhost settings."""

        cfg = self.get()

        # (this may be called with explicit None values for localhost
        # and owner, so we can't use proper defaults in the arg list)
        if not host:
            # if no host is given the caller is asking about localhost
            host = 'localhost'

        # is there a matching host section?
        host_key = None
        if host:
            if host in cfg['hosts']:
                # there's an entry for this host
                host_key = host
            else:
                # try for a pattern match
                for cfg_host in cfg['hosts']:
                    if re.match(cfg_host, host):
                        host_key = cfg_host
                        break
        modify_dirs = False
        if host_key is not None:
            # entry exists, any unset items under it have already
            # defaulted to modified localhost values (see site cfgspec)
            value = cfg['hosts'][host_key][item]
        else:
            # no entry so default to localhost and modify appropriately
            value = cfg['hosts']['localhost'][item]
            modify_dirs = True
        if value is not None and 'directory' in item:
            if replace_home or modify_dirs:
                # Replace local home dir with $HOME for eval'n on other host.
                value = value.replace(os.environ['HOME'], '$HOME')
            elif is_remote_user(owner):
                # Replace with ~owner for direct access via local filesys
                # (works for standard cylc-run directory location).
                if owner_home is None:
                    owner_home = os.path.expanduser('~%s' % owner)
                value = value.replace(os.environ['HOME'], owner_home)
        if item == "task communication method" and value == "default":
            # Translate "default" to client-server comms: "https" or "http".
            value = cfg['communication']['method']
        return value
Exemple #8
0
    def get_host_item(self, item, host=None, owner=None, replace_home=False,
                      owner_home=None):
        """This allows hosts with no matching entry in the config file
        to default to appropriately modified localhost settings."""

        cfg = self.get()

        # (this may be called with explicit None values for localhost
        # and owner, so we can't use proper defaults in the arg list)
        if not host:
            # if no host is given the caller is asking about localhost
            host = 'localhost'

        # is there a matching host section?
        host_key = None
        if host:
            if host in cfg['hosts']:
                # there's an entry for this host
                host_key = host
            else:
                # try for a pattern match
                for cfg_host in cfg['hosts']:
                    if re.match(cfg_host, host):
                        host_key = cfg_host
                        break
        modify_dirs = False
        if host_key is not None:
            # entry exists, any unset items under it have already
            # defaulted to modified localhost values (see site cfgspec)
            value = cfg['hosts'][host_key][item]
        else:
            # no entry so default to localhost and modify appropriately
            value = cfg['hosts']['localhost'][item]
            modify_dirs = True
        if value is not None and 'directory' in item:
            if replace_home or modify_dirs:
                # Replace local home dir with $HOME for eval'n on other host.
                value = value.replace(os.environ['HOME'], '$HOME')
            elif is_remote_user(owner):
                # Replace with ~owner for direct access via local filesys
                # (works for standard cylc-run directory location).
                if owner_home is None:
                    owner_home = os.path.expanduser('~%s' % owner)
                value = value.replace(os.environ['HOME'], owner_home)
        if item == "task communication method" and value == "default":
            # Translate "default" to client-server comms: "https" or "http".
            value = cfg['communication']['method']
        return value
    def get_suite_source_dir(self, reg, suite_owner=None):
        """Return the source directory path of a suite.

        Will register un-registered suites located in the cylc run dir.
        """
        srv_d = self.get_suite_srv_dir(reg, suite_owner)
        fname = os.path.join(srv_d, self.FILE_BASE_SOURCE)
        try:
            source = os.readlink(fname)
        except OSError:
            suite_d = os.path.dirname(srv_d)
            if os.path.exists(suite_d) and not is_remote_user(suite_owner):
                # suite exists but is not yet registered
                self.register(reg=reg, source=suite_d)
                return suite_d
            else:
                raise SuiteServiceFileError("ERROR: Suite not found %s" % reg)
        else:
            if os.path.isabs(source):
                return source
            else:
                return os.path.normpath(os.path.join(srv_d, source))
    def get_suite_source_dir(self, reg, suite_owner=None):
        """Return the source directory path of a suite.

        Will register un-registered suites located in the cylc run dir.
        """
        srv_d = self.get_suite_srv_dir(reg, suite_owner)
        fname = os.path.join(srv_d, self.FILE_BASE_SOURCE)
        try:
            source = os.readlink(fname)
        except OSError:
            suite_d = os.path.dirname(srv_d)
            if os.path.exists(suite_d) and not is_remote_user(suite_owner):
                # suite exists but is not yet registered
                self.register(reg=reg, source=suite_d)
                return suite_d
            else:
                raise SuiteServiceFileError("Suite not found %s" % reg)
        else:
            if os.path.isabs(source):
                return source
            else:
                return os.path.normpath(os.path.join(srv_d, source))
Exemple #11
0
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": HTTP(S) and SSH+HTTP(S) task comm
            ".service/passphrase": HTTP(S) task comm
            ".service/ssl.cert": HTTPS task comm
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        items = self._remote_init_items(host, owner)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create "stdin_file_paths" file, with "items" in it.
        tmphandle = NamedTemporaryFile()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite), 'uuid')
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid))
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flags.debug:
            cmd.append('--debug')
        cmd.append(str(self.uuid))
        cmd.append(glbl_cfg().get_derived_host_item(self.suite,
                                                    'suite run directory',
                                                    host, owner))
        self.proc_pool.put_command(
            SuiteProcContext('remote-init',
                             cmd,
                             stdin_file_paths=[tmphandle.name]),
            self._remote_init_callback, [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]
Exemple #12
0
 def test_is_remote_user_on_current_user(self):
     """is_remote_user with current user."""
     self.assertFalse(is_remote_user(None))
     self.assertFalse(is_remote_user(os.getenv('USER')))
Exemple #13
0
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            ".service/passphrase": For TCP task communication
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        comm_meth = glbl_cfg().get_host_item(
            'task communication method', host, owner)
        owner_at_host = 'localhost'
        if host:
            owner_at_host = host
        if owner:
            owner_at_host = owner + '@' + owner_at_host
        LOG.debug('comm_meth[%s]=%s' % (owner_at_host, comm_meth))
        items = self._remote_init_items(comm_meth)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = self.proc_pool.get_temporary_file()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid_str).encode())
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flags.debug:
            cmd.append('--debug')
        if comm_meth in ['ssh']:
            cmd.append('--indirect-comm=%s' % comm_meth)
        cmd.append(str(self.uuid_str))
        cmd.append(glbl_cfg().get_derived_host_item(
            self.suite, 'suite run directory', host, owner))
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback,
            [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]
Exemple #14
0
 def test_is_remote_user_on_current_user(self):
     """is_remote_user with current user."""
     self.assertFalse(is_remote_user(None))
     self.assertFalse(is_remote_user(os.getenv('USER')))
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": For TCP task communication
            ".service/passphrase": For TCP task communication
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        comm_meth = glbl_cfg().get_host_item(
            'task communication method', host, owner)
        owner_at_host = 'localhost'
        if host:
            owner_at_host = host
        if owner:
            owner_at_host = owner + '@' + owner_at_host
        LOG.debug('comm_meth[%s]=%s' % (owner_at_host, comm_meth))
        items = self._remote_init_items(comm_meth)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create a TAR archive with the service files,
        # so they can be sent later via SSH's STDIN to the task remote.
        tmphandle = TemporaryFile()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite),
            FILE_BASE_UUID)
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid_str).encode())
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flags.debug:
            cmd.append('--debug')
        if comm_meth in ['ssh']:
            cmd.append('--indirect-comm=%s' % comm_meth)
        cmd.append(str(self.uuid_str))
        cmd.append(glbl_cfg().get_derived_host_item(
            self.suite, 'suite run directory', host, owner))
        self.proc_pool.put_command(
            SubProcContext('remote-init', cmd, stdin_files=[tmphandle]),
            self._remote_init_callback,
            [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]