コード例 #1
0
    def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys):
        """Process retrieval of task job logs from remote user@host."""
        if ctx.user_at_host and "@" in ctx.user_at_host:
            s_user, s_host = ctx.user_at_host.split("@", 1)
        else:
            s_user, s_host = (None, ctx.user_at_host)
        ssh_str = str(GLOBAL_CFG.get_host_item("ssh command", s_host, s_user))
        rsync_str = str(GLOBAL_CFG.get_host_item(
            "retrieve job logs command", s_host, s_user))

        cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str]
        if cylc.flags.debug:
            cmd.append("-v")
        if ctx.max_size:
            cmd.append("--max-size=%s" % (ctx.max_size,))
        # Includes and excludes
        includes = set()
        for _, point, name, submit_num in id_keys:
            # Include relevant directories, all levels needed
            includes.add("/%s" % (point))
            includes.add("/%s/%s" % (point, name))
            includes.add("/%s/%s/%02d" % (point, name, submit_num))
            includes.add("/%s/%s/%02d/**" % (point, name, submit_num))
        cmd += ["--include=%s" % (include) for include in sorted(includes)]
        cmd.append("--exclude=/**")  # exclude everything else
        # Remote source
        cmd.append(ctx.user_at_host + ":" + GLOBAL_CFG.get_derived_host_item(
            schd_ctx.suite, "suite job log directory", s_host, s_user) + "/")
        # Local target
        cmd.append(GLOBAL_CFG.get_derived_host_item(
            schd_ctx.suite, "suite job log directory") + "/")
        self.proc_pool.put_command(
            SuiteProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys),
            self._job_logs_retrieval_callback, [schd_ctx])
コード例 #2
0
ファイル: job_host.py プロジェクト: benfitzpatrick/cylc
    def init_suite_run_dir(self, suite_name, user_at_host):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if '@' in user_at_host:
            owner, host = user_at_host.split('@', 1)
        else:
            owner, host = None, user_at_host
        if ((owner, host) in [(None, 'localhost'), (USER, 'localhost')] or
                host in self.initialised_hosts or
                self.single_task_mode):
            return

        suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite run directory')
        sources = [os.path.join(suite_run_dir, CylcSuiteEnv.BASE_NAME)]
        if 'CYLC_SUITE_DEF_PATH' in os.environ:
            sources.append(
                os.path.join(os.getenv('CYLC_SUITE_DEF_PATH'), 'passphrase'))
        suite_run_py = os.path.join(suite_run_dir, 'python')
        if os.path.isdir(suite_run_py):
            sources.append(suite_run_py)
        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite run directory', host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite job log directory', host, owner)
        getLogger('main').log(INFO, 'Initialising %s:%s' % (
            user_at_host, r_suite_run_dir))

        ssh_tmpl = GLOBAL_CFG.get_host_item(
            'remote shell template', host, owner).replace(' %s', '')
        scp_tmpl = GLOBAL_CFG.get_host_item(
            'remote copy template', host, owner)

        cmd1 = shlex.split(ssh_tmpl) + [
            "-n", user_at_host,
            'mkdir', '-p', r_suite_run_dir, r_log_job_dir]
        cmd2 = shlex.split(scp_tmpl) + ['-pr'] + sources + [
            user_at_host + ":" + r_suite_run_dir + '/']
        for cmd in [cmd1, cmd2]:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    user_at_host, " ".join([quote(item) for item in cmd]),
                    proc.returncode, out, err)
        self.initialised_hosts.append(user_at_host)
コード例 #3
0
    def init_suite_run_dir(self, suite_name, user_at_host):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if '@' in user_at_host:
            owner, host = user_at_host.split('@', 1)
        else:
            owner, host = None, user_at_host
        if ((owner, host) in [(None, 'localhost'), (USER, 'localhost')]
                or host in self.initialised_hosts or self.single_task_mode):
            return

        suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite run directory')
        sources = [os.path.join(suite_run_dir, CylcSuiteEnv.BASE_NAME)]
        if 'CYLC_SUITE_DEF_PATH' in os.environ:
            sources.append(
                os.path.join(os.getenv('CYLC_SUITE_DEF_PATH'), 'passphrase'))
        suite_run_py = os.path.join(suite_run_dir, 'python')
        if os.path.isdir(suite_run_py):
            sources.append(suite_run_py)
        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite run directory', host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite job log directory', host, owner)
        getLogger('main').log(
            INFO, 'Initialising %s:%s' % (user_at_host, r_suite_run_dir))

        ssh_tmpl = GLOBAL_CFG.get_host_item('remote shell template', host,
                                            owner).replace(' %s', '')
        scp_tmpl = GLOBAL_CFG.get_host_item('remote copy template', host,
                                            owner)

        cmd1 = shlex.split(ssh_tmpl) + [
            "-n", user_at_host, 'mkdir', '-p', r_suite_run_dir, r_log_job_dir
        ]
        cmd2 = shlex.split(scp_tmpl) + ['-pr'] + sources + [
            user_at_host + ":" + r_suite_run_dir + '/'
        ]
        for cmd in [cmd1, cmd2]:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    user_at_host, " ".join([quote(item) for item in cmd]),
                    proc.returncode, out, err)
        self.initialised_hosts.append(user_at_host)
コード例 #4
0
ファイル: jobfile.py プロジェクト: dmanubens/cylc
    def write_environment_1( self, BUFFER=None ):
        if not BUFFER:
            BUFFER = self.FILE

        BUFFER.write( "\n\n# CYLC SUITE ENVIRONMENT:" )

        # write the static suite variables
        for var, val in sorted(self.__class__.suite_env.items()):
            BUFFER.write( "\nexport " + var + "=" + str(val) )

        if str(self.__class__.suite_env.get('CYLC_UTC')) == 'True':
            BUFFER.write( "\nexport TZ=UTC" )

        BUFFER.write("\n")
        # override and write task-host-specific suite variables
        suite_work_dir = GLOBAL_CFG.get_derived_host_item( self.suite, 'suite work directory', self.host, self.owner )
        st_env = deepcopy( self.__class__.suite_task_env ) 
        st_env[ 'CYLC_SUITE_RUN_DIR'    ] = GLOBAL_CFG.get_derived_host_item( self.suite, 'suite run directory', self.host, self.owner )
        st_env[ 'CYLC_SUITE_WORK_DIR'   ] = suite_work_dir
        st_env[ 'CYLC_SUITE_SHARE_DIR'  ] = GLOBAL_CFG.get_derived_host_item( self.suite, 'suite share directory', self.host, self.owner )
        st_env[ 'CYLC_SUITE_SHARE_PATH' ] = '$CYLC_SUITE_SHARE_DIR' # DEPRECATED
        rsp = self.jobconfig['remote suite path']
        if rsp:
            st_env[ 'CYLC_SUITE_DEF_PATH' ] = rsp
        else:
            # replace home dir with '$HOME' for evaluation on the task host
            st_env[ 'CYLC_SUITE_DEF_PATH' ] = re.sub( os.environ['HOME'], '$HOME', st_env['CYLC_SUITE_DEF_PATH'] )
        for var, val in sorted(st_env.items()):
            BUFFER.write( "\nexport " + var + "=" + str(val) )

        task_work_dir  = os.path.join( suite_work_dir, self.jobconfig['work sub-directory'] )

        use_login_shell = GLOBAL_CFG.get_host_item( 'use login shell', self.host, self.owner )
        comms = GLOBAL_CFG.get_host_item( 'task communication method', self.host, self.owner )

        BUFFER.write( "\n\n# CYLC TASK ENVIRONMENT:" )
        BUFFER.write( "\nexport CYLC_TASK_COMMS_METHOD=" + comms )
        BUFFER.write( "\nexport CYLC_TASK_CYCLE_POINT=" + self.point_string )
        BUFFER.write( "\nexport CYLC_TASK_CYCLE_TIME=" + self.point_string )
        BUFFER.write( "\nexport CYLC_TASK_ID=" + self.task_id )
        BUFFER.write( "\nexport CYLC_TASK_IS_COLDSTART=" + str( self.jobconfig['is cold-start']) )
        BUFFER.write( "\nexport CYLC_TASK_LOG_ROOT=" + self.log_root )
        BUFFER.write( "\nexport CYLC_TASK_MSG_MAX_TRIES=" + str( GLOBAL_CFG.get( ['task messaging','maximum number of tries'])) )
        BUFFER.write( "\nexport CYLC_TASK_MSG_RETRY_INTVL=" + str( GLOBAL_CFG.get( ['task messaging','retry interval in seconds'])) )
        BUFFER.write( "\nexport CYLC_TASK_MSG_TIMEOUT=" + str( GLOBAL_CFG.get( ['task messaging','connection timeout in seconds'])) )
        BUFFER.write( "\nexport CYLC_TASK_NAME=" + self.task_name )
        BUFFER.write( '\nexport CYLC_TASK_NAMESPACE_HIERARCHY="' + ' '.join( self.jobconfig['namespace hierarchy']) + '"')
        BUFFER.write( "\nexport CYLC_TASK_SSH_LOGIN_SHELL=" + str(use_login_shell) )
        BUFFER.write( "\nexport CYLC_TASK_SUBMIT_NUMBER=" + str(self.jobconfig['absolute submit number']) )
        BUFFER.write( "\nexport CYLC_TASK_TRY_NUMBER=" + str(self.jobconfig['try number']) )
        BUFFER.write( "\nexport CYLC_TASK_WORK_DIR=" + task_work_dir )
        BUFFER.write( "\nexport CYLC_TASK_WORK_PATH=$CYLC_TASK_WORK_DIR") # DEPRECATED
コード例 #5
0
ファイル: job_host.py プロジェクト: ScottWales/cylc
    def init_suite_run_dir(self, suite_name, user_at_host):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if '@' in user_at_host:
            owner, host = user_at_host.split('@', 1)
        else:
            owner, host = None, user_at_host
        if ((owner, host) in [(None, 'localhost'), (user, 'localhost')] or
                host in self.initialised_hosts or
                self.single_task_mode):
            return

        suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            suite_name, 'suite run directory')
        sources = [os.path.join(suite_run_dir, "cylc-suite-env")]
        suite_run_py = os.path.join(suite_run_dir, "python")
        if os.path.isdir(suite_run_py):
            sources.append(suite_run_py)
        try:
            r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(
                suite_name, 'suite run directory', host, owner)
            r_log_job_dir = GLOBAL_CFG.get_derived_host_item(
                suite_name, 'suite job log directory', host, owner)
            getLogger('main').log(INFO, 'Initialising %s:%s' % (
                user_at_host, r_suite_run_dir))

            ssh_tmpl = GLOBAL_CFG.get_host_item(
                'remote shell template', host, owner).replace(" %s", "")
            scp_tmpl = GLOBAL_CFG.get_host_item(
                'remote copy template', host, owner)

            cmd1 = shlex.split(ssh_tmpl) + [
                user_at_host,
                'mkdir -p "%s" "%s"' % (r_suite_run_dir, r_log_job_dir)]
            cmd2 = shlex.split(scp_tmpl) + ["-r"] + sources + [
                user_at_host + ":" + r_suite_run_dir + "/"]
            for cmd in [cmd1, cmd2]:
                check_call(cmd)
        except Exception:
            raise RemoteJobHostInitError(user_at_host)
        self.initialised_hosts.append(user_at_host)
コード例 #6
0
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if cylc.flags.debug:
                cmd.append("--debug")
            if is_remote_host(host):
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(
                GLOBAL_CFG.get_derived_host_item(suite,
                                                 "suite job log directory",
                                                 host, owner))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(
                    self.task_events_mgr.get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(SuiteProcContext(cmd_key, cmd),
                                       callback, [suite, itasks])
コード例 #7
0
ファイル: job_logs.py プロジェクト: dmanubens-zz/cylc
    def get_create_job_log_path(cls, suite, task_name, task_point, submit_num):
        """Return a new job log path on the suite host, in two parts.

        /part1/part2

        * part1: the top level job log directory on the suite host.
        * part2: the rest, which is also used on remote task hosts.

        The full local job log directory is created if necessary, and its
        parent symlinked to NN (submit number).

        """

        suite_job_log_dir = GLOBAL_CFG.get_derived_host_item(
            suite, "suite job log directory")

        the_rest_dir = os.path.join(str(task_point), task_name,
                                    "%02d" % int(submit_num))
        the_rest = os.path.join(the_rest_dir, "job")

        local_log_dir = os.path.join(suite_job_log_dir, the_rest_dir)

        mkdir_p(local_log_dir)
        target = os.path.join(os.path.dirname(local_log_dir), "NN")
        try:
            os.unlink(target)
        except OSError:
            pass
        try:
            os.symlink(os.path.basename(local_log_dir), target)
        except OSError as exc:
            if not exc.filename:
                exc.filename = target
            raise exc
        return suite_job_log_dir, the_rest
コード例 #8
0
ファイル: job_logs.py プロジェクト: dmanubens/cylc
    def get_create_job_log_path(cls, suite, task_name, task_point, submit_num):
        """Return a new job log path on the suite host, in two parts.

        /part1/part2

        * part1: the top level job log directory on the suite host.
        * part2: the rest, which is also used on remote task hosts.

        The full local job log directory is created if necessary, and its
        parent symlinked to NN (submit number).

        """

        suite_job_log_dir = GLOBAL_CFG.get_derived_host_item(
            suite, "suite job log directory")

        the_rest_dir = os.path.join(
            str(task_point), task_name, "%02d" % int(submit_num))
        the_rest = os.path.join(the_rest_dir, "job")

        local_log_dir = os.path.join(suite_job_log_dir, the_rest_dir)

        mkdir_p(local_log_dir)
        target = os.path.join(os.path.dirname(local_log_dir), "NN")
        try:
            os.unlink(target)
        except OSError:
            pass
        try:
            os.symlink(os.path.basename(local_log_dir), target)
        except OSError as exc:
            if not exc.filename:
                exc.filename = target
            raise exc
        return suite_job_log_dir, the_rest
コード例 #9
0
    def unlink_suite_contact_files(self, reg):
        """Remove suite contact files from initialised hosts.

        This is called on shutdown, so we don't want anything to hang.
        Terminate any incomplete SSH commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for user_at_host, should_unlink in self.initialised_hosts.items():
            if not should_unlink:
                continue
            if "@" in user_at_host:
                owner, host = user_at_host.split("@", 1)
            else:
                owner, host = None, user_at_host
            ssh_tmpl = GLOBAL_CFG.get_host_item("remote shell template", host, owner)
            r_suite_contact_file = os.path.join(
                GLOBAL_CFG.get_derived_host_item(reg, "suite run directory", host, owner),
                SuiteSrvFilesManager.DIR_BASE_SRV,
                SuiteSrvFilesManager.FILE_BASE_CONTACT,
            )
            cmd = shlex.split(ssh_tmpl) + ["-n", user_at_host, "rm", "-f", r_suite_contact_file]
            procs[user_at_host] = (cmd, Popen(cmd, stdout=PIPE, stderr=PIPE))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for user_at_host, (cmd, proc) in procs.items():
                if not proc.poll():
                    continue
                del procs[user_at_host]
                out, err = proc.communicate()
                if proc.wait():
                    ERR.warning(
                        RemoteJobHostInitError(
                            RemoteJobHostInitError.MSG_TIDY,
                            user_at_host,
                            " ".join([quote(item) for item in cmd]),
                            proc.returncode,
                            out,
                            err,
                        )
                    )
        # Terminate any remaining commands
        for user_at_host, (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            proc.wait()
            ERR.warning(
                RemoteJobHostInitError(
                    RemoteJobHostInitError.MSG_TIDY,
                    user_at_host,
                    " ".join([quote(item) for item in cmd]),
                    proc.returncode,
                    out,
                    err,
                )
            )
コード例 #10
0
ファイル: task_job_mgr.py プロジェクト: m214089/cylc
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs."""
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks = self.prep_submit_task_jobs(suite, itasks)
        if not prepared_tasks:
            return

        # Submit task jobs
        auth_itasks = {}
        for itask in prepared_tasks:
            # The job file is now (about to be) used: reset the file write flag
            # so that subsequent manual retrigger will generate a new job file.
            itask.local_job_file_path = None
            itask.state.reset_state(TASK_STATUS_READY)
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for auth, itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", self.JOBS_SUBMIT]
            if cylc.flags.debug:
                cmd.append("--debug")
            host, owner = auth
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append("--")
            cmd.append(GLOBAL_CFG.get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            stdin_file_paths = []
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                if remote_mode:
                    stdin_file_paths.append(
                        self.task_events_mgr.get_task_job_log(
                            suite, itask.point, itask.tdef.name,
                            itask.submit_num, self.JOB_FILE_BASE))
                job_log_dirs.append(self.task_events_mgr.get_task_job_id(
                    itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    cmd,
                    stdin_file_paths=stdin_file_paths,
                    job_log_dirs=job_log_dirs,
                    **kwargs
                ),
                self._submit_task_jobs_callback, [suite, itasks])
コード例 #11
0
ファイル: task_job_mgr.py プロジェクト: zhengge2017/cylc
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs."""
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks = self.prep_submit_task_jobs(suite, itasks)
        if not prepared_tasks:
            return

        # Submit task jobs
        auth_itasks = {}
        for itask in prepared_tasks:
            # The job file is now (about to be) used: reset the file write flag
            # so that subsequent manual retrigger will generate a new job file.
            itask.local_job_file_path = None
            itask.state.reset_state(TASK_STATUS_READY)
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for auth, itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", self.JOBS_SUBMIT]
            if cylc.flags.debug:
                cmd.append("--debug")
            host, owner = auth
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append("--")
            cmd.append(GLOBAL_CFG.get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            stdin_file_paths = []
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                if remote_mode:
                    stdin_file_paths.append(
                        self.task_events_mgr.get_task_job_log(
                            suite, itask.point, itask.tdef.name,
                            itask.submit_num, self.JOB_FILE_BASE))
                job_log_dirs.append(self.task_events_mgr.get_task_job_id(
                    itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SuiteProcContext(
                    self.JOBS_SUBMIT,
                    cmd,
                    stdin_file_paths=stdin_file_paths,
                    job_log_dirs=job_log_dirs,
                    **kwargs
                ),
                self._submit_task_jobs_callback, [suite, itasks])
コード例 #12
0
ファイル: job_host.py プロジェクト: kaday/cylc
    def init_suite_run_dir(self, suite_name, user_at_host):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if "@" in user_at_host:
            owner, host = user_at_host.split("@", 1)
        else:
            owner, host = None, user_at_host
        if (
            (owner, host) in [(None, "localhost"), (user, "localhost")]
            or host in self.initialised_hosts
            or self.single_task_mode
        ):
            return

        suite_run_dir = GLOBAL_CFG.get_derived_host_item(suite_name, "suite run directory")
        sources = [os.path.join(suite_run_dir, "cylc-suite-env")]
        if "CYLC_SUITE_DEF_PATH" in os.environ:
            sources.append(os.path.join(os.getenv("CYLC_SUITE_DEF_PATH"), "passphrase"))
        suite_run_py = os.path.join(suite_run_dir, "python")
        if os.path.isdir(suite_run_py):
            sources.append(suite_run_py)
        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(suite_name, "suite run directory", host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(suite_name, "suite job log directory", host, owner)
        getLogger("main").log(INFO, "Initialising %s:%s" % (user_at_host, r_suite_run_dir))

        ssh_tmpl = GLOBAL_CFG.get_host_item("remote shell template", host, owner).replace(" %s", "")
        scp_tmpl = GLOBAL_CFG.get_host_item("remote copy template", host, owner)

        cmd1 = shlex.split(ssh_tmpl) + ["-n", user_at_host, "mkdir", "-p", r_suite_run_dir, r_log_job_dir]
        cmd2 = shlex.split(scp_tmpl) + ["-pr"] + sources + [user_at_host + ":" + r_suite_run_dir + "/"]
        for cmd in [cmd1, cmd2]:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    user_at_host, " ".join([quote(item) for item in cmd]), proc.returncode, out, err
                )
        self.initialised_hosts.append(user_at_host)
コード例 #13
0
 def get_task_job_log(
         self, suite, point, name, submit_num=None, tail=None):
     """Return the job log path."""
     args = [
         GLOBAL_CFG.get_derived_host_item(suite, "suite job log directory"),
         self.get_task_job_id(point, name, submit_num)]
     if tail:
         args.append(tail)
     return os.path.join(*args)
コード例 #14
0
ファイル: suite_logging.py プロジェクト: ScottWales/cylc
    def __init__( self, suite ):

        self.ldir = GLOBAL_CFG.get_derived_host_item( suite, 'suite log directory' )
        self.path = os.path.join( self.ldir, 'log' ) 

        self.err_path = os.path.join( self.ldir, 'err' )
        self.roll_at_startup = GLOBAL_CFG.get( ['suite logging','roll over at start-up'] )
        self.n_keep = GLOBAL_CFG.get( ['suite logging','rolling archive length'] )
        self.max_bytes = GLOBAL_CFG.get( ['suite logging','maximum size in bytes'] )
コード例 #15
0
ファイル: suite_output.py プロジェクト: ScottWales/cylc
    def __init__( self, suite ):

        sodir = GLOBAL_CFG.get_derived_host_item( suite, 'suite log directory' )
        self.opath = os.path.join( sodir, 'out' ) 
        self.epath = os.path.join( sodir, 'err' ) 

        # use same archive length as logging (TODO: document this)
        self.roll_at_startup = GLOBAL_CFG.get( ['suite logging','roll over at start-up'] )
        self.arclen = GLOBAL_CFG.get( ['suite logging','rolling archive length'] )
コード例 #16
0
 def _load_remote_item(self, item, reg, owner, host):
     """Load content of service item from remote [owner@]host via SSH."""
     if not is_remote_host(host) and not is_remote_user(owner):
         return
     # Prefix STDOUT to ensure returned content is relevant
     prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg}
     # Attempt to cat passphrase file under suite service directory
     from cylc.cfgspec.globalcfg import GLOBAL_CFG
     script = (
         r"""echo '%(prefix)s'; """
         r'''cat "%(run_d)s/%(srv_base)s/%(item)s"'''
     ) % {
         'prefix': prefix,
         'run_d': GLOBAL_CFG.get_derived_host_item(
             reg, 'suite run directory', host, owner),
         'srv_base': self.DIR_BASE_SRV,
         'item': item
     }
     import shlex
     command = shlex.split(
         GLOBAL_CFG.get_host_item('remote shell template', host, owner))
     command += ['-n', owner + '@' + host, script]
     from subprocess import Popen, PIPE
     try:
         proc = Popen(command, stdout=PIPE, stderr=PIPE)
     except OSError:
         if cylc.flags.debug:
             import traceback
             traceback.print_exc()
         return
     out, err = proc.communicate()
     ret_code = proc.wait()
     # Extract passphrase from STDOUT
     # It should live in the line with the correct prefix
     content = ""
     can_read = False
     for line in out.splitlines(True):
         if can_read:
             content += line
         elif line.strip() == prefix:
             can_read = True
     if not content or ret_code:
         if cylc.flags.debug:
             print >> sys.stderr, (
                 'ERROR: %(command)s # code=%(ret_code)s\n%(err)s\n'
             ) % {
                 'command': command,
                 # STDOUT may contain passphrase, so not safe to print
                 # 'out': out,
                 'err': err,
                 'ret_code': ret_code,
             }
         return
     return content
コード例 #17
0
 def get_suite_srv_dir(self, reg, suite_owner=None):
     """Return service directory of a suite."""
     if not suite_owner:
         suite_owner = get_user()
     run_d = os.getenv("CYLC_SUITE_RUN_DIR")
     if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg
             or os.getenv("CYLC_SUITE_OWNER") != suite_owner):
         from cylc.cfgspec.globalcfg import GLOBAL_CFG
         run_d = GLOBAL_CFG.get_derived_host_item(reg,
                                                  'suite run directory')
     return os.path.join(run_d, self.DIR_BASE_SRV)
コード例 #18
0
ファイル: suite_srv_files_mgr.py プロジェクト: m214089/cylc
 def _load_remote_item(self, item, reg, owner, host):
     """Load content of service item from remote [owner@]host via SSH."""
     if not is_remote_host(host) and not is_remote_user(owner):
         return
     # Prefix STDOUT to ensure returned content is relevant
     prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg}
     # Attempt to cat passphrase file under suite service directory
     from cylc.cfgspec.globalcfg import GLOBAL_CFG
     script = (
         r"""echo '%(prefix)s'; """
         r'''cat "%(run_d)s/%(srv_base)s/%(item)s"'''
     ) % {
         'prefix': prefix,
         'run_d': GLOBAL_CFG.get_derived_host_item(
             reg, 'suite run directory', host, owner),
         'srv_base': self.DIR_BASE_SRV,
         'item': item
     }
     import shlex
     command = shlex.split(
         GLOBAL_CFG.get_host_item('ssh command', host, owner))
     command += ['-n', owner + '@' + host, script]
     from subprocess import Popen, PIPE
     try:
         proc = Popen(command, stdout=PIPE, stderr=PIPE)
     except OSError:
         if cylc.flags.debug:
             import traceback
             traceback.print_exc()
         return
     out, err = proc.communicate()
     ret_code = proc.wait()
     # Extract passphrase from STDOUT
     # It should live in the line with the correct prefix
     content = ""
     can_read = False
     for line in out.splitlines(True):
         if can_read:
             content += line
         elif line.strip() == prefix:
             can_read = True
     if not content or ret_code:
         if cylc.flags.debug:
             print >> sys.stderr, (
                 'ERROR: %(command)s # code=%(ret_code)s\n%(err)s\n'
             ) % {
                 'command': command,
                 # STDOUT may contain passphrase, so not safe to print
                 # 'out': out,
                 'err': err,
                 'ret_code': ret_code,
             }
         return
     return content
コード例 #19
0
ファイル: suite_srv_files_mgr.py プロジェクト: m214089/cylc
 def get_suite_srv_dir(self, reg, suite_owner=None):
     """Return service directory of a suite."""
     if not suite_owner:
         suite_owner = USER
     run_d = os.getenv("CYLC_SUITE_RUN_DIR")
     if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or
             os.getenv("CYLC_SUITE_OWNER") != suite_owner):
         from cylc.cfgspec.globalcfg import GLOBAL_CFG
         run_d = GLOBAL_CFG.get_derived_host_item(
             reg, 'suite run directory')
     return os.path.join(run_d, self.DIR_BASE_SRV)
コード例 #20
0
ファイル: suite_output.py プロジェクト: kaday/cylc
    def __init__(self, suite):

        sodir = GLOBAL_CFG.get_derived_host_item(suite, 'suite log directory')
        self.opath = os.path.join(sodir, 'out')
        self.epath = os.path.join(sodir, 'err')

        # use same archive length as logging (TODO: document this)
        self.roll_at_startup = GLOBAL_CFG.get(
            ['suite logging', 'roll over at start-up'])
        self.arclen = GLOBAL_CFG.get(
            ['suite logging', 'rolling archive length'])
コード例 #21
0
ファイル: job_host.py プロジェクト: rbaumert/cylc
    def unlink_suite_contact_files(self, reg):
        """Remove suite contact files from initialised hosts.

        This is called on shutdown, so we don't want anything to hang.
        Terminate any incomplete SSH commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), should_unlink in self.initialised.items():
            if not should_unlink:
                continue
            user_at_host = host
            if owner:
                user_at_host = owner + '@' + host
            ssh_tmpl = GLOBAL_CFG.get_host_item('remote shell template', host,
                                                owner)
            r_suite_contact_file = os.path.join(
                GLOBAL_CFG.get_derived_host_item(reg, 'suite run directory',
                                                 host, owner),
                SuiteSrvFilesManager.DIR_BASE_SRV,
                SuiteSrvFilesManager.FILE_BASE_CONTACT)
            cmd = shlex.split(ssh_tmpl) + [
                '-n', user_at_host, 'rm', '-f', r_suite_contact_file
            ]
            procs[user_at_host] = (cmd, Popen(cmd, stdout=PIPE, stderr=PIPE))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for user_at_host, (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[user_at_host]
                out, err = proc.communicate()
                if proc.wait():
                    ERR.warning(
                        RemoteJobHostInitError(
                            RemoteJobHostInitError.MSG_TIDY, user_at_host,
                            ' '.join([quote(item) for item in cmd]),
                            proc.returncode, out, err))
        # Terminate any remaining commands
        for user_at_host, (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                ERR.warning(
                    RemoteJobHostInitError(
                        RemoteJobHostInitError.MSG_TIDY, user_at_host,
                        ' '.join([quote(item) for item in cmd]),
                        proc.returncode, out, err))
コード例 #22
0
 def __init__(self, suite, run_mode='live', ict=None, stop_point=None):
     self.run_mode = run_mode
     self.cts_str = None
     self.set_cts(ict, stop_point)
     self.dir_name = GLOBAL_CFG.get_derived_host_item(
         suite, 'suite state directory')
     self.file_name = os.path.join(self.dir_name, self.BASE_NAME)
     self.arch_len = GLOBAL_CFG.get(['state dump rolling archive length'])
     if not self.arch_len or int(self.arch_len) <= 1:
         self.arch_len = 1
     self.arch_files = []
     self.pool = None
     self.log = logging.getLogger('main')
コード例 #23
0
 def __init__(self, suite, run_mode='live', ict=None, stop_point=None):
     self.run_mode = run_mode
     self.set_cts(ict, stop_point)
     self.dir_name = GLOBAL_CFG.get_derived_host_item(
         suite, 'suite state directory')
     self.file_name = os.path.join(self.dir_name, self.BASE_NAME)
     self.arch_len = GLOBAL_CFG.get(['state dump rolling archive length'])
     if not self.arch_len or int(self.arch_len) <= 1:
         self.arch_len = 1
     self.arch_files = []
     self.pool = None
     self.wireless = None
     self.log = logging.getLogger('main')
コード例 #24
0
    def __init__(self, suite):

        self.ldir = GLOBAL_CFG.get_derived_host_item(
            suite, 'suite log directory')
        self.path = os.path.join(self.ldir, 'log')

        self.err_path = os.path.join(self.ldir, 'err')
        self.roll_at_startup = GLOBAL_CFG.get(
            ['suite logging', 'roll over at start-up'])
        self.n_keep = GLOBAL_CFG.get(
            ['suite logging', 'rolling archive length'])
        self.max_bytes = GLOBAL_CFG.get(
            ['suite logging', 'maximum size in bytes'])
コード例 #25
0
ファイル: task_job_mgr.py プロジェクト: m214089/cylc
    def unlink_hosts_contacts(self, reg):
        """Remove suite contact files from initialised hosts.

        This is called on shutdown, so we don't want anything to hang.
        Terminate any incomplete SSH commands after 10 seconds.
        """
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), should_unlink in self.init_host_map.items():
            if not should_unlink:
                continue
            user_at_host = host
            if owner:
                user_at_host = owner + '@' + host
            ssh_tmpl = GLOBAL_CFG.get_host_item('ssh command', host, owner)
            r_suite_contact_file = os.path.join(
                GLOBAL_CFG.get_derived_host_item(
                    reg, 'suite run directory', host, owner),
                self.suite_srv_files_mgr.DIR_BASE_SRV,
                self.suite_srv_files_mgr.FILE_BASE_CONTACT)
            cmd = shlex.split(ssh_tmpl) + [
                '-n', user_at_host, 'rm', '-f', r_suite_contact_file]
            procs[user_at_host] = (cmd, Popen(cmd, stdout=PIPE, stderr=PIPE))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for user_at_host, (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[user_at_host]
                out, err = proc.communicate()
                if proc.wait():
                    ERR.warning(RemoteJobHostInitError(
                        RemoteJobHostInitError.MSG_TIDY,
                        user_at_host, ' '.join([quote(item) for item in cmd]),
                        proc.returncode, out, err))
        # Terminate any remaining commands
        for user_at_host, (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                ERR.warning(RemoteJobHostInitError(
                    RemoteJobHostInitError.MSG_TIDY,
                    user_at_host, ' '.join([quote(item) for item in cmd]),
                    proc.returncode, out, err))
コード例 #26
0
ファイル: suite_logging.py プロジェクト: m214089/cylc
    def __init__(self, suite, test_params=None):
        if SuiteLog.__INSTANCE:
            raise Exception("Attempting to initiate a second singleton"
                            "instance.")

        self._group = None
        if not test_params:
            self.is_test = False
            self.max_bytes = GLOBAL_CFG.get(
                ['suite logging', 'maximum size in bytes'])
            self.roll_at_startup = GLOBAL_CFG.get(
                ['suite logging', 'roll over at start-up'])
            self.archive_length = GLOBAL_CFG.get(
                ['suite logging', 'rolling archive length'])
        else:
            self.is_test = True
            self.max_bytes = test_params['max_bytes']
            self.roll_at_startup = test_params['roll_at_startup']
            self.archive_length = 4

        # Log paths.
        if test_params:
            self.ldir = test_params['ldir']
        else:
            self.ldir = GLOBAL_CFG.get_derived_host_item(
                suite, 'suite log directory')
        self.log_paths = {}
        self.log_paths[self.LOG] = os.path.join(self.ldir, self.LOG)
        self.log_paths[self.OUT] = os.path.join(self.ldir, self.OUT)
        self.log_paths[self.ERR] = os.path.join(self.ldir, self.ERR)

        # The loggers.
        self.loggers = {}
        self.loggers[self.LOG] = None
        self.loggers[self.OUT] = None
        self.loggers[self.ERR] = None

        # Filename stamp functions.
        if self.is_test:
            self.stamp = lambda: get_current_time_string(True, True, True
                                                         ).replace('.', '-')
        else:
            self.stamp = lambda: get_current_time_string(False, True, True)

        SuiteLog.__INSTANCE = self
コード例 #27
0
ファイル: suite_logging.py プロジェクト: rbaumert/cylc
    def __init__(self, suite, test_params=None):
        if SuiteLog.__INSTANCE:
            raise Exception("Attempting to initiate a second singleton"
                            "instance.")

        self._group = None
        if not test_params:
            self.is_test = False
            self.max_bytes = GLOBAL_CFG.get(
                ['suite logging', 'maximum size in bytes'])
            self.roll_at_startup = GLOBAL_CFG.get(
                ['suite logging', 'roll over at start-up'])
            self.archive_length = GLOBAL_CFG.get(
                ['suite logging', 'rolling archive length'])
        else:
            self.is_test = True
            self.max_bytes = test_params['max_bytes']
            self.roll_at_startup = test_params['roll_at_startup']
            self.archive_length = 4

        # Log paths.
        if test_params:
            self.ldir = test_params['ldir']
        else:
            self.ldir = GLOBAL_CFG.get_derived_host_item(
                suite, 'suite log directory')
        self.log_paths = {}
        self.log_paths[self.LOG] = os.path.join(self.ldir, self.LOG)
        self.log_paths[self.OUT] = os.path.join(self.ldir, self.OUT)
        self.log_paths[self.ERR] = os.path.join(self.ldir, self.ERR)

        # The loggers.
        self.loggers = {}
        self.loggers[self.LOG] = None
        self.loggers[self.OUT] = None
        self.loggers[self.ERR] = None

        # Filename stamp functions.
        if self.is_test:
            self.stamp = lambda: get_current_time_string(True, True, True
                                                         ).replace('.', '-')
        else:
            self.stamp = lambda: get_current_time_string(False, True, True)

        SuiteLog.__INSTANCE = self
コード例 #28
0
ファイル: task_job_mgr.py プロジェクト: m214089/cylc
    def _run_job_cmd(self, cmd_key, suite, itasks, callback):
        """Run job commands, e.g. poll, kill, etc.

        Group itasks with their user@host.
        Put a job command for each user@host to the multiprocess pool.

        """
        if not itasks:
            return
        auth_itasks = {}
        for itask in itasks:
            if (itask.task_host, itask.task_owner) not in auth_itasks:
                auth_itasks[(itask.task_host, itask.task_owner)] = []
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        for (host, owner), itasks in sorted(auth_itasks.items()):
            cmd = ["cylc", cmd_key]
            if cylc.flags.debug:
                cmd.append("--debug")
            try:
                if is_remote_host(host):
                    cmd.append("--host=%s" % (host))
            except IOError:
                # Bad host, run the command any way, command will fail and
                # callback will deal with it
                cmd.append("--host=%s" % (host))
            if is_remote_user(owner):
                cmd.append("--user=%s" % (owner))
            cmd.append("--")
            cmd.append(GLOBAL_CFG.get_derived_host_item(
                suite, "suite job log directory", host, owner))
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                job_log_dirs.append(self.task_events_mgr.get_task_job_id(
                    itask.point, itask.tdef.name, itask.submit_num))
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SuiteProcContext(cmd_key, cmd), callback, [suite, itasks])
コード例 #29
0
ファイル: updater_graph.py プロジェクト: aosprey/cylc
    def __init__(self, cfg, updater, theme, info_bar, xdot):
        super(GraphUpdater, self).__init__()

        self.quit = False
        self.cleared = False
        self.ignore_suicide = True
        self.focus_start_point_string = None
        self.focus_stop_point_string = None
        self.xdot = xdot
        self.first_update = False
        self.graph_disconnect = False
        self.action_required = True
        self.oldest_point_string = None
        self.newest_point_string = None
        self.orientation = "TB"  # Top to Bottom ordering of nodes
        self.best_fit = True  # zoom to page size
        self.normal_fit = False  # zoom to 1.0 scale
        self.crop = False
        self.subgraphs_on = False   # organise by cycle point.

        self.descendants = {}
        self.all_families = []
        self.triggering_families = []
        self.write_dot_frames = False

        self.prev_graph_id = ()

        self.cfg = cfg
        self.updater = updater
        self.theme = theme
        self.info_bar = info_bar
        self.state_summary = {}
        self.fam_state_summary = {}
        self.global_summary = {}
        self.last_update_time = None

        self.god = None
        self.mode = "waiting..."
        self.dt = "waiting..."

        self.prev_graph_id = ()

        # empty graphw object:
        self.graphw = graphing.CGraphPlain(self.cfg.suite)

        # TODO - handle failure to get a remote proxy in reconnect()

        self.graph_warned = {}

        # lists of nodes to newly group or ungroup (not of all currently
        # grouped and ungrouped nodes - still held server side)
        self.group = []
        self.ungroup = []
        self.have_leaves_and_feet = False
        self.leaves = []
        self.feet = []

        self.ungroup_recursive = False
        if "graph" in self.cfg.ungrouped_views:
            self.ungroup_all = True
            self.group_all = False
        else:
            self.ungroup_all = False
            self.group_all = True

        self.graph_frame_count = 0
        self.suite_share_dir = GLOBAL_CFG.get_derived_host_item(
            self.cfg.suite, 'suite share directory')
コード例 #30
0
ファイル: job_logs.py プロジェクト: dmanubens/cylc
 def __init__(self, suite, task_name, task_point):
     dir_ = GLOBAL_CFG.get_derived_host_item(
         suite, "suite job log directory")
     self.base_path = os.path.join(dir_, str(task_point), task_name)
     self.suite_logger = logging.getLogger("main")
コード例 #31
0
    def _prep_submit_task_job_impl(self, suite, itask, rtconfig):
        """Helper for self._prep_submit_task_job."""
        # Submit number
        itask.submit_num += 1
        itask.summary['submit_num'] = itask.submit_num

        itask.task_owner = rtconfig['remote']['owner']
        if itask.task_owner:
            owner_at_host = itask.task_owner + "@" + itask.task_host
        else:
            owner_at_host = itask.task_host
        itask.summary['host'] = owner_at_host
        itask.summary['job_hosts'][itask.submit_num] = owner_at_host

        itask.summary['batch_sys_name'] = rtconfig['job']['batch system']
        for name in rtconfig['extra log files']:
            itask.summary['logfiles'].append(expandvars(name))
        try:
            batch_sys_conf = self.task_events_mgr.get_host_conf(
                itask, 'batch systems')[rtconfig['job']['batch system']]
        except (TypeError, KeyError):
            batch_sys_conf = {}
        try:
            itask.summary[self.KEY_EXECUTE_TIME_LIMIT] = float(
                rtconfig['job']['execution time limit'])
        except TypeError:
            pass
        if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
            # Default = 1, 2 and 7 minutes intervals, roughly 1, 3 and 10
            # minutes after time limit exceeded
            itask.poll_timers[self.KEY_EXECUTE_TIME_LIMIT] = (TaskActionTimer(
                delays=batch_sys_conf.get(
                    'execution time limit polling intervals', [60, 120, 420])))
        for label, key in [
            ('submission polling intervals', TASK_STATUS_SUBMITTED),
            ('execution polling intervals', TASK_STATUS_RUNNING)
        ]:
            if key in itask.poll_timers:
                itask.poll_timers[key].reset()
            else:
                values = self.task_events_mgr.get_host_conf(itask,
                                                            label,
                                                            skey='job')
                if values:
                    itask.poll_timers[key] = TaskActionTimer(delays=values)

        scripts = self._get_job_scripts(itask, rtconfig)

        # Retry delays, needed for the try_num
        self._set_retry_timers(itask, rtconfig)

        # Location of job file, etc
        self._create_job_log_path(suite, itask)
        job_d = self.task_events_mgr.get_task_job_id(itask.point,
                                                     itask.tdef.name,
                                                     itask.submit_num)
        job_file_path = os.path.join(
            GLOBAL_CFG.get_derived_host_item(suite, "suite job log directory",
                                             itask.task_host,
                                             itask.task_owner), job_d,
            self.JOB_FILE_BASE)
        return {
            'batch_system_name':
            rtconfig['job']['batch system'],
            'batch_submit_command_template':
            (rtconfig['job']['batch submit command template']),
            'batch_system_conf':
            batch_sys_conf,
            'directives':
            rtconfig['directives'],
            'environment':
            rtconfig['environment'],
            'execution_time_limit':
            itask.summary[self.KEY_EXECUTE_TIME_LIMIT],
            'env-script':
            rtconfig['env-script'],
            'err-script':
            rtconfig['err-script'],
            'host':
            itask.task_host,
            'init-script':
            rtconfig['init-script'],
            'job_file_path':
            job_file_path,
            'job_d':
            job_d,
            'namespace_hierarchy':
            itask.tdef.namespace_hierarchy,
            'owner':
            itask.task_owner,
            'param_env_tmpl':
            rtconfig['parameter environment templates'],
            'param_var':
            itask.tdef.param_var,
            'post-script':
            scripts[2],
            'pre-script':
            scripts[0],
            'remote_suite_d':
            rtconfig['remote']['suite definition directory'],
            'script':
            scripts[1],
            'shell':
            rtconfig['job']['shell'],
            'submit_num':
            itask.submit_num,
            'suite_name':
            suite,
            'task_id':
            itask.identity,
            'try_num':
            itask.get_try_num(),
            'work_d':
            rtconfig['work sub-directory'],
        }
コード例 #32
0
    def init_suite_run_dir(self, reg, user_at_host):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if "@" in user_at_host:
            owner, host = user_at_host.split("@", 1)
        else:
            owner, host = None, user_at_host
        if (
            (owner, host) in [(None, "localhost"), (USER, "localhost")]
            or host in self.initialised_hosts
            or self.single_task_mode
        ):
            return

        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(reg, "suite run directory", host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(reg, "suite job log directory", host, owner)
        r_suite_srv_dir = os.path.join(r_suite_run_dir, self.suite_srv_files_mgr.DIR_BASE_SRV)

        # Create a UUID file in the service directory.
        # If remote host has the file in its service directory, we can assume
        # that the remote host has a shared file system with the suite host.
        ssh_tmpl = GLOBAL_CFG.get_host_item("remote shell template", host, owner)
        uuid_str = str(uuid4())
        uuid_fname = os.path.join(self.suite_srv_files_mgr.get_suite_srv_dir(reg), uuid_str)
        try:
            open(uuid_fname, "wb").close()
            proc = Popen(
                shlex.split(ssh_tmpl) + ["-n", user_at_host, "test", "-e", os.path.join(r_suite_srv_dir, uuid_str)],
                stdout=PIPE,
                stderr=PIPE,
            )
            if proc.wait() == 0:
                # Initialised, but no need to tidy up
                self.initialised_hosts[user_at_host] = False
                return
        finally:
            try:
                os.unlink(uuid_fname)
            except OSError:
                pass

        cmds = []
        # Command to create suite directory structure on remote host.
        cmds.append(
            shlex.split(ssh_tmpl) + ["-n", user_at_host, "mkdir", "-p", r_suite_run_dir, r_log_job_dir, r_suite_srv_dir]
        )
        # Command to copy contact and authentication files to remote host.
        # Note: no need to do this if task communication method is "poll".
        should_unlink = GLOBAL_CFG.get_host_item("task communication method", host, owner) != "poll"
        if should_unlink:
            scp_tmpl = GLOBAL_CFG.get_host_item("remote copy template", host, owner)
            cmds.append(
                shlex.split(scp_tmpl)
                + [
                    "-p",
                    self.suite_srv_files_mgr.get_contact_file(reg),
                    self.suite_srv_files_mgr.get_auth_item(self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE, reg),
                    self.suite_srv_files_mgr.get_auth_item(self.suite_srv_files_mgr.FILE_BASE_SSL_CERT, reg),
                    user_at_host + ":" + r_suite_srv_dir + "/",
                ]
            )
        # Command to copy python library to remote host.
        suite_run_py = os.path.join(GLOBAL_CFG.get_derived_host_item(reg, "suite run directory"), "python")
        if os.path.isdir(suite_run_py):
            cmds.append(shlex.split(scp_tmpl) + ["-pr", suite_run_py, user_at_host + ":" + r_suite_run_dir + "/"])
        # Run commands in sequence.
        for cmd in cmds:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    RemoteJobHostInitError.MSG_INIT,
                    user_at_host,
                    " ".join([quote(item) for item in cmd]),
                    proc.returncode,
                    out,
                    err,
                )
        self.initialised_hosts[user_at_host] = should_unlink
        LOG.info("Initialised %s:%s" % (user_at_host, r_suite_run_dir))
コード例 #33
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)
        if not prepared_tasks:
            return bad_tasks

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.summary['latest_message'] = self.REMOTE_INIT_MSG
                continue
            # Persist
            if owner:
                owner_at_host = owner + '@' + host
            else:
                owner_at_host = host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and perist
                LOG.info('submit-num=%d, owner@host=%s' %
                         (itask.submit_num, owner_at_host),
                         itask=itask)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'user_at_host': owner_at_host,
                        'batch_sys_name': itask.summary['batch_sys_name'],
                    })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    self.task_events_mgr.log_task_job_activity(
                        SuiteProcContext(self.JOBS_SUBMIT,
                                         '(init %s)' % owner_at_host,
                                         err=REMOTE_INIT_FAILED,
                                         ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED,
                        self.poll_task_jobs)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if cylc.flags.debug:
                cmd.append('--debug')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [('host', host, is_remote_host),
                                          ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(
                GLOBAL_CFG.get_derived_host_item(suite,
                                                 'suite job log directory',
                                                 host, owner))
            stdin_file_paths = []
            job_log_dirs = []
            for itask in sorted(itasks, key=lambda itask: itask.identity):
                if remote_mode:
                    stdin_file_paths.append(
                        self.task_events_mgr.get_task_job_log(
                            suite, itask.point, itask.tdef.name,
                            itask.submit_num, self.JOB_FILE_BASE))
                job_log_dirs.append(
                    self.task_events_mgr.get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
                # The job file is now (about to be) used: reset the file write
                # flag so that subsequent manual retrigger will generate a new
                # job file.
                itask.local_job_file_path = None
                itask.state.reset_state(TASK_STATUS_READY)
                if itask.state.outputs.has_custom_triggers():
                    self.suite_db_mgr.put_update_task_outputs(itask)
            cmd += job_log_dirs
            self.proc_pool.put_command(
                SuiteProcContext(self.JOBS_SUBMIT,
                                 cmd,
                                 stdin_file_paths=stdin_file_paths,
                                 job_log_dirs=job_log_dirs,
                                 **kwargs), self._submit_task_jobs_callback,
                [suite, itasks])
        return done_tasks
コード例 #34
0
ファイル: job_logs.py プロジェクト: dmanubens-zz/cylc
 def __init__(self, suite, task_name, task_point):
     dir_ = GLOBAL_CFG.get_derived_host_item(suite,
                                             "suite job log directory")
     self.base_path = os.path.join(dir_, str(task_point), task_name)
     self.suite_logger = logging.getLogger("main")
コード例 #35
0
ファイル: task_job_mgr.py プロジェクト: m214089/cylc
    def init_host(self, reg, host, owner):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if host is None:
            host = 'localhost'
        if ((host, owner) in [('localhost', None), ('localhost', USER)] or
                (host, owner) in self.init_host_map or self.single_task_mode):
            return
        user_at_host = host
        if owner:
            user_at_host = owner + '@' + host

        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            reg, 'suite run directory', host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(
            reg, 'suite job log directory', host, owner)
        r_suite_srv_dir = os.path.join(
            r_suite_run_dir, self.suite_srv_files_mgr.DIR_BASE_SRV)

        # Create a UUID file in the service directory.
        # If remote host has the file in its service directory, we can assume
        # that the remote host has a shared file system with the suite host.
        ssh_tmpl = GLOBAL_CFG.get_host_item('ssh command', host, owner)
        uuid_str = str(uuid4())
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(reg), uuid_str)
        try:
            open(uuid_fname, 'wb').close()
            proc = Popen(
                shlex.split(ssh_tmpl) + [
                    '-n', user_at_host,
                    'test', '-e', os.path.join(r_suite_srv_dir, uuid_str)],
                stdout=PIPE, stderr=PIPE)
            if proc.wait() == 0:
                # Initialised, but no need to tidy up
                self.init_host_map[(host, owner)] = False
                return
        finally:
            try:
                os.unlink(uuid_fname)
            except OSError:
                pass

        cmds = []
        # Command to create suite directory structure on remote host.
        cmds.append(shlex.split(ssh_tmpl) + [
            '-n', user_at_host,
            'mkdir', '-p',
            r_suite_run_dir, r_log_job_dir, r_suite_srv_dir])
        # Command to copy contact and authentication files to remote host.
        # Note: no need to do this if task communication method is "poll".
        should_unlink = GLOBAL_CFG.get_host_item(
            'task communication method', host, owner) != "poll"
        if should_unlink:
            scp_tmpl = GLOBAL_CFG.get_host_item('scp command', host, owner)
            cmds.append(shlex.split(scp_tmpl) + [
                '-p',
                self.suite_srv_files_mgr.get_contact_file(reg),
                self.suite_srv_files_mgr.get_auth_item(
                    self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE, reg),
                self.suite_srv_files_mgr.get_auth_item(
                    self.suite_srv_files_mgr.FILE_BASE_SSL_CERT, reg),
                user_at_host + ':' + r_suite_srv_dir + '/'])
        # Command to copy python library to remote host.
        suite_run_py = os.path.join(
            GLOBAL_CFG.get_derived_host_item(reg, 'suite run directory'),
            'python')
        if os.path.isdir(suite_run_py):
            cmds.append(shlex.split(scp_tmpl) + [
                '-pr',
                suite_run_py, user_at_host + ':' + r_suite_run_dir + '/'])
        # Run commands in sequence.
        for cmd in cmds:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    RemoteJobHostInitError.MSG_INIT,
                    user_at_host, ' '.join([quote(item) for item in cmd]),
                    proc.returncode, out, err)
        self.init_host_map[(host, owner)] = should_unlink
        LOG.info('Initialised %s:%s' % (user_at_host, r_suite_run_dir))
コード例 #36
0
ファイル: task_job_mgr.py プロジェクト: zhengge2017/cylc
    def init_host(self, reg, host, owner):
        """Initialise suite run dir on a user@host.

        Create SUITE_RUN_DIR/log/job/ if necessary.
        Install suite contact environment file.
        Install suite python modules.

        Raise RemoteJobHostInitError if initialisation cannot complete.

        """
        if host is None:
            host = 'localhost'
        if (self.single_task_mode or
                (host, owner) in self.init_host_map or
                not is_remote(host, owner)):
            return
        user_at_host = host
        if owner:
            user_at_host = owner + '@' + host

        r_suite_run_dir = GLOBAL_CFG.get_derived_host_item(
            reg, 'suite run directory', host, owner)
        r_log_job_dir = GLOBAL_CFG.get_derived_host_item(
            reg, 'suite job log directory', host, owner)
        r_suite_srv_dir = os.path.join(
            r_suite_run_dir, self.suite_srv_files_mgr.DIR_BASE_SRV)

        # Create a UUID file in the service directory.
        # If remote host has the file in its service directory, we can assume
        # that the remote host has a shared file system with the suite host.
        ssh_tmpl = GLOBAL_CFG.get_host_item('ssh command', host, owner)
        uuid_str = str(uuid4())
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(reg), uuid_str)
        try:
            open(uuid_fname, 'wb').close()
            proc = Popen(
                shlex.split(ssh_tmpl) + [
                    '-n', user_at_host,
                    'test', '-e', os.path.join(r_suite_srv_dir, uuid_str)],
                stdout=PIPE, stderr=PIPE)
            if proc.wait() == 0:
                # Initialised, but no need to tidy up
                self.init_host_map[(host, owner)] = False
                return
        finally:
            try:
                os.unlink(uuid_fname)
            except OSError:
                pass

        cmds = []
        # Command to create suite directory structure on remote host.
        cmds.append(shlex.split(ssh_tmpl) + [
            '-n', user_at_host,
            'mkdir', '-p',
            r_suite_run_dir, r_log_job_dir, r_suite_srv_dir])
        # Command to copy contact and authentication files to remote host.
        # Note: no need to do this if task communication method is "poll".
        should_unlink = GLOBAL_CFG.get_host_item(
            'task communication method', host, owner) != "poll"
        if should_unlink:
            scp_tmpl = GLOBAL_CFG.get_host_item('scp command', host, owner)
            # Handle not having SSL certs installed.
            try:
                ssl_cert = self.suite_srv_files_mgr.get_auth_item(
                    self.suite_srv_files_mgr.FILE_BASE_SSL_CERT, reg)
            except (SuiteServiceFileError, ValueError):
                ssl_cert = None
            cmds.append(shlex.split(scp_tmpl) + [
                '-p',
                self.suite_srv_files_mgr.get_contact_file(reg),
                self.suite_srv_files_mgr.get_auth_item(
                    self.suite_srv_files_mgr.FILE_BASE_PASSPHRASE, reg),
                user_at_host + ':' + r_suite_srv_dir + '/'])
            if ssl_cert is not None:
                cmds[-1].insert(-1, ssl_cert)
        # Command to copy python library to remote host.
        suite_run_py = os.path.join(
            GLOBAL_CFG.get_derived_host_item(reg, 'suite run directory'),
            'python')
        if os.path.isdir(suite_run_py):
            cmds.append(shlex.split(scp_tmpl) + [
                '-pr',
                suite_run_py, user_at_host + ':' + r_suite_run_dir + '/'])
        # Run commands in sequence.
        for cmd in cmds:
            proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
            out, err = proc.communicate()
            if proc.wait():
                raise RemoteJobHostInitError(
                    RemoteJobHostInitError.MSG_INIT,
                    user_at_host, ' '.join(quote(item) for item in cmd),
                    proc.returncode, out, err)
        self.init_host_map[(host, owner)] = should_unlink
        LOG.info('Initialised %s:%s' % (user_at_host, r_suite_run_dir))
コード例 #37
0
ファイル: suite_logging.py プロジェクト: m214089/cylc
 def get_dir_for_suite(suite):
     """Returns the logging directory for a given suite without setting up
     suite logging."""
     return GLOBAL_CFG.get_derived_host_item(suite, 'suite log directory')
コード例 #38
0
    def remote_init(self, host, owner):
        """Initialise a remote [owner@]host if necessary.

        Create UUID file on suite host ".service/uuid" for remotes to identify
        shared file system with suite host.

        Call "cylc remote-init" to install suite items to remote:
            ".service/contact": HTTP(S) and SSH+HTTP(S) task comm
            ".service/passphrase": HTTP(S) task comm
            ".service/ssl.cert": HTTPS task comm
            "python/": if source exists

        Return:
            REMOTE_INIT_NOT_REQUIRED:
                If remote init is not required, e.g. not remote
            REMOTE_INIT_DONE:
                If remote init done.
            REMOTE_INIT_FAILED:
                If init of the remote failed.
                Note: this will reset to None to allow retry.
            None:
                If waiting for remote init command to complete

        """
        if self.single_task_mode or not is_remote(host, owner):
            return REMOTE_INIT_NOT_REQUIRED
        try:
            status = self.remote_init_map[(host, owner)]
        except KeyError:
            pass  # Not yet initialised
        else:
            if status == REMOTE_INIT_FAILED:
                del self.remote_init_map[(host, owner)]  # reset to allow retry
            return status

        # Determine what items to install
        items = self._remote_init_items(host, owner)
        # No item to install
        if not items:
            self.remote_init_map[(host, owner)] = REMOTE_INIT_NOT_REQUIRED
            return self.remote_init_map[(host, owner)]

        # Create "stdin_file_paths" file, with "items" in it.
        tmphandle = NamedTemporaryFile()
        tarhandle = tarfile.open(fileobj=tmphandle, mode='w')
        for path, arcname in items:
            tarhandle.add(path, arcname=arcname)
        tarhandle.close()
        tmphandle.seek(0)
        # UUID file - for remote to identify shared file system with suite host
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite), 'uuid')
        if not os.path.exists(uuid_fname):
            open(uuid_fname, 'wb').write(str(self.uuid))
        # Build the command
        cmd = ['cylc', 'remote-init']
        if is_remote_host(host):
            cmd.append('--host=%s' % host)
        if is_remote_user(owner):
            cmd.append('--user=%s' % owner)
        if cylc.flags.debug:
            cmd.append('--debug')
        cmd.append(str(self.uuid))
        cmd.append(
            GLOBAL_CFG.get_derived_host_item(self.suite, 'suite run directory',
                                             host, owner))
        self.proc_pool.put_command(
            SuiteProcContext('remote-init',
                             cmd,
                             stdin_file_paths=[tmphandle.name]),
            self._remote_init_callback, [host, owner, tmphandle])
        # None status: Waiting for command to finish
        self.remote_init_map[(host, owner)] = None
        return self.remote_init_map[(host, owner)]
コード例 #39
0
ファイル: suite_logging.py プロジェクト: zhengge2017/cylc
 def get_dir_for_suite(suite):
     """Returns the logging directory for a given suite without setting up
     suite logging."""
     return GLOBAL_CFG.get_derived_host_item(suite, 'suite log directory')
コード例 #40
0
ファイル: task_job_mgr.py プロジェクト: m214089/cylc
    def _prep_submit_task_job_impl(self, suite, itask):
        """Helper for self._prep_submit_task_job."""
        overrides = BroadcastServer.get_inst().get(itask.identity)
        if overrides:
            rtconfig = pdeepcopy(itask.tdef.rtconfig)
            poverride(rtconfig, overrides)
        else:
            rtconfig = itask.tdef.rtconfig

        # Retry delays, needed for the try_num
        self._set_retry_timers(itask, rtconfig)

        # Submit number and try number
        LOG.debug("[%s] -incrementing submit number" % (itask.identity,))
        itask.submit_num += 1
        itask.summary['submit_num'] = itask.submit_num
        itask.local_job_file_path = None
        self.suite_db_mgr.put_insert_task_jobs(itask, {
            "is_manual_submit": itask.is_manual_submit,
            "try_num": itask.get_try_num(),
            "time_submit": get_current_time_string(),
        })

        itask.summary['batch_sys_name'] = rtconfig['job']['batch system']
        for name in rtconfig['extra log files']:
            itask.summary['logfiles'].append(expandvars(name))

        # Determine task host settings now, just before job submission,
        # because dynamic host selection may be used.

        # host may be None (= run task on suite host)
        itask.task_host = get_task_host(rtconfig['remote']['host'])
        if not itask.task_host:
            itask.task_host = 'localhost'
        elif itask.task_host != "localhost":
            LOG.info("[%s] -Task host: %s" % (
                itask.identity, itask.task_host))

        itask.task_owner = rtconfig['remote']['owner']

        if itask.task_owner:
            user_at_host = itask.task_owner + "@" + itask.task_host
        else:
            user_at_host = itask.task_host
        itask.summary['host'] = user_at_host
        itask.summary['job_hosts'][itask.submit_num] = user_at_host
        try:
            batch_sys_conf = self.task_events_mgr.get_host_conf(
                itask, 'batch systems')[rtconfig['job']['batch system']]
        except (TypeError, KeyError):
            batch_sys_conf = {}
        try:
            itask.summary[self.KEY_EXECUTE_TIME_LIMIT] = float(
                rtconfig['job']['execution time limit'])
        except TypeError:
            pass
        if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
            # Default = 1, 2 and 7 minutes intervals, roughly 1, 3 and 10
            # minutes after time limit exceeded
            itask.poll_timers[self.KEY_EXECUTE_TIME_LIMIT] = (
                TaskActionTimer(delays=batch_sys_conf.get(
                    'execution time limit polling intervals', [60, 120, 420])))
        for label, key in [
                ('submission polling intervals', TASK_STATUS_SUBMITTED),
                ('execution polling intervals', TASK_STATUS_RUNNING)]:
            if key in itask.poll_timers:
                itask.poll_timers[key].reset()
            else:
                values = self.task_events_mgr.get_host_conf(
                    itask, label, skey='job')
                if values:
                    itask.poll_timers[key] = TaskActionTimer(delays=values)

        self.init_host(suite, itask.task_host, itask.task_owner)
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "user_at_host": user_at_host,
            "batch_sys_name": itask.summary['batch_sys_name'],
        })
        itask.is_manual_submit = False

        scripts = self._get_job_scripts(itask, rtconfig)

        # Location of job file, etc
        self._create_job_log_path(suite, itask)
        job_d = self.task_events_mgr.get_task_job_id(
            itask.point, itask.tdef.name, itask.submit_num)
        job_file_path = os.path.join(
            GLOBAL_CFG.get_derived_host_item(
                suite, "suite job log directory",
                itask.task_host, itask.task_owner),
            job_d, self.JOB_FILE_BASE)
        return {
            'batch_system_name': rtconfig['job']['batch system'],
            'batch_submit_command_template': (
                rtconfig['job']['batch submit command template']),
            'batch_system_conf': batch_sys_conf,
            'directives': rtconfig['directives'],
            'environment': rtconfig['environment'],
            'execution_time_limit': itask.summary[self.KEY_EXECUTE_TIME_LIMIT],
            'env-script': rtconfig['env-script'],
            'err-script': rtconfig['err-script'],
            'host': itask.task_host,
            'init-script': rtconfig['init-script'],
            'job_file_path': job_file_path,
            'job_d': job_d,
            'namespace_hierarchy': itask.tdef.namespace_hierarchy,
            'owner': itask.task_owner,
            'param_var': itask.tdef.param_var,
            'post-script': scripts[2],
            'pre-script': scripts[0],
            'remote_suite_d': rtconfig['remote']['suite definition directory'],
            'script': scripts[1],
            'shell': rtconfig['job']['shell'],
            'submit_num': itask.submit_num,
            'suite_name': suite,
            'task_id': itask.identity,
            'try_num': itask.get_try_num(),
            'work_d': rtconfig['work sub-directory'],
        }
コード例 #41
0
ファイル: job_file.py プロジェクト: rbaumert/cylc
 def _get_derived_host_item(job_conf, key):
     """Return derived host item from GLOBAL_CFG."""
     return GLOBAL_CFG.get_derived_host_item(job_conf['suite_name'], key,
                                             job_conf["host"],
                                             job_conf["owner"])
コード例 #42
0
ファイル: job_logs.py プロジェクト: ScottWales/cylc
    def get_latest_job_log(cls, suite, task_name, task_point):
        """Return the latest job log path on the suite host."""

        suite_job_log_dir = GLOBAL_CFG.get_derived_host_item(suite, "suite job log directory")
        the_rest = os.path.join(str(task_point), task_name, "NN", "job")
        return os.path.join(suite_job_log_dir, the_rest)
コード例 #43
0
    def __init__(self, cfg, updater, theme, info_bar, xdot):
        super(GraphUpdater, self).__init__()

        self.quit = False
        self.cleared = False
        self.ignore_suicide = True
        self.focus_start_point_string = None
        self.focus_stop_point_string = None
        self.xdot = xdot
        self.first_update = False
        self.graph_disconnect = False
        self.action_required = True
        self.oldest_point_string = None
        self.newest_point_string = None
        self.orientation = "TB"  # Top to Bottom ordering of nodes
        self.best_fit = True  # zoom to page size
        self.normal_fit = False  # zoom to 1.0 scale
        self.crop = False
        self.subgraphs_on = False  # organise by cycle point.

        self.descendants = {}
        self.all_families = []
        self.write_dot_frames = False

        self.prev_graph_id = ()

        self.cfg = cfg
        self.updater = updater
        self.theme = theme
        self.info_bar = info_bar
        self.state_summary = {}
        self.fam_state_summary = {}
        self.global_summary = {}
        self.last_update_time = None

        self.god = None
        self.mode = "waiting..."
        self.update_time_str = "waiting..."

        self.prev_graph_id = ()

        # empty graphw object:
        self.graphw = CGraphPlain(self.cfg.suite)

        # TODO - handle failure to get a remote proxy in reconnect()

        self.graph_warned = {}

        # lists of nodes to newly group or ungroup (not of all currently
        # grouped and ungrouped nodes - still held server side)
        self.group = []
        self.ungroup = []
        self.have_leaves_and_feet = False
        self.leaves = []
        self.feet = []

        self.ungroup_recursive = False
        if "graph" in self.cfg.ungrouped_views:
            self.ungroup_all = True
            self.group_all = False
        else:
            self.ungroup_all = False
            self.group_all = True

        self.graph_frame_count = 0
        self.suite_share_dir = GLOBAL_CFG.get_derived_host_item(
            self.cfg.suite, 'suite share directory')
コード例 #44
0
ファイル: task_job_mgr.py プロジェクト: zhengge2017/cylc
    def _prep_submit_task_job_impl(self, suite, itask):
        """Helper for self._prep_submit_task_job."""
        overrides = self.task_events_mgr.broadcast_mgr.get_broadcast(
            itask.identity)
        if overrides:
            rtconfig = pdeepcopy(itask.tdef.rtconfig)
            poverride(rtconfig, overrides)
        else:
            rtconfig = itask.tdef.rtconfig

        # Retry delays, needed for the try_num
        self._set_retry_timers(itask, rtconfig)

        # Submit number and try number
        LOG.debug("[%s] -incrementing submit number" % (itask.identity,))
        itask.submit_num += 1
        itask.summary['submit_num'] = itask.submit_num
        itask.local_job_file_path = None
        self.suite_db_mgr.put_insert_task_jobs(itask, {
            "is_manual_submit": itask.is_manual_submit,
            "try_num": itask.get_try_num(),
            "time_submit": get_current_time_string(),
        })

        itask.summary['batch_sys_name'] = rtconfig['job']['batch system']
        for name in rtconfig['extra log files']:
            itask.summary['logfiles'].append(expandvars(name))

        # Determine task host settings now, just before job submission,
        # because dynamic host selection may be used.

        # host may be None (= run task on suite host)
        itask.task_host = get_task_host(rtconfig['remote']['host'])
        if not itask.task_host:
            itask.task_host = 'localhost'
        elif itask.task_host != "localhost":
            LOG.info("[%s] -Task host: %s" % (
                itask.identity, itask.task_host))

        itask.task_owner = rtconfig['remote']['owner']

        if itask.task_owner:
            user_at_host = itask.task_owner + "@" + itask.task_host
        else:
            user_at_host = itask.task_host
        itask.summary['host'] = user_at_host
        itask.summary['job_hosts'][itask.submit_num] = user_at_host
        try:
            batch_sys_conf = self.task_events_mgr.get_host_conf(
                itask, 'batch systems')[rtconfig['job']['batch system']]
        except (TypeError, KeyError):
            batch_sys_conf = {}
        try:
            itask.summary[self.KEY_EXECUTE_TIME_LIMIT] = float(
                rtconfig['job']['execution time limit'])
        except TypeError:
            pass
        if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
            # Default = 1, 2 and 7 minutes intervals, roughly 1, 3 and 10
            # minutes after time limit exceeded
            itask.poll_timers[self.KEY_EXECUTE_TIME_LIMIT] = (
                TaskActionTimer(delays=batch_sys_conf.get(
                    'execution time limit polling intervals', [60, 120, 420])))
        for label, key in [
                ('submission polling intervals', TASK_STATUS_SUBMITTED),
                ('execution polling intervals', TASK_STATUS_RUNNING)]:
            if key in itask.poll_timers:
                itask.poll_timers[key].reset()
            else:
                values = self.task_events_mgr.get_host_conf(
                    itask, label, skey='job')
                if values:
                    itask.poll_timers[key] = TaskActionTimer(delays=values)

        self.init_host(suite, itask.task_host, itask.task_owner)
        if itask.state.outputs.has_custom_triggers():
            self.suite_db_mgr.put_update_task_outputs(itask)
        self.suite_db_mgr.put_update_task_jobs(itask, {
            "user_at_host": user_at_host,
            "batch_sys_name": itask.summary['batch_sys_name'],
        })
        itask.is_manual_submit = False

        scripts = self._get_job_scripts(itask, rtconfig)

        # Location of job file, etc
        self._create_job_log_path(suite, itask)
        job_d = self.task_events_mgr.get_task_job_id(
            itask.point, itask.tdef.name, itask.submit_num)
        job_file_path = os.path.join(
            GLOBAL_CFG.get_derived_host_item(
                suite, "suite job log directory",
                itask.task_host, itask.task_owner),
            job_d, self.JOB_FILE_BASE)
        return {
            'batch_system_name': rtconfig['job']['batch system'],
            'batch_submit_command_template': (
                rtconfig['job']['batch submit command template']),
            'batch_system_conf': batch_sys_conf,
            'directives': rtconfig['directives'],
            'environment': rtconfig['environment'],
            'execution_time_limit': itask.summary[self.KEY_EXECUTE_TIME_LIMIT],
            'env-script': rtconfig['env-script'],
            'err-script': rtconfig['err-script'],
            'host': itask.task_host,
            'init-script': rtconfig['init-script'],
            'job_file_path': job_file_path,
            'job_d': job_d,
            'namespace_hierarchy': itask.tdef.namespace_hierarchy,
            'owner': itask.task_owner,
            'param_var': itask.tdef.param_var,
            'post-script': scripts[2],
            'pre-script': scripts[0],
            'remote_suite_d': rtconfig['remote']['suite definition directory'],
            'script': scripts[1],
            'shell': rtconfig['job']['shell'],
            'submit_num': itask.submit_num,
            'suite_name': suite,
            'task_id': itask.identity,
            'try_num': itask.get_try_num(),
            'work_d': rtconfig['work sub-directory'],
        }
コード例 #45
0
ファイル: job_file.py プロジェクト: bjlittle/cylc
    def _write_environment_1(self, handle, job_conf):
        """Suite and task environment."""
        handle.write("\n\n# CYLC SUITE ENVIRONMENT:")

        # write the static suite variables
        for var, val in sorted(self.suite_env.items()):
            handle.write("\nexport " + var + "=" + str(val))

        if str(self.suite_env.get("CYLC_UTC")) == "True":
            handle.write("\nexport TZ=UTC")

        handle.write("\n")
        # override and write task-host-specific suite variables
        suite_work_dir = GLOBAL_CFG.get_derived_host_item(
            job_conf["suite name"], "suite work directory", job_conf["host"], job_conf["owner"]
        )
        st_env = {}
        st_env["CYLC_SUITE_RUN_DIR"] = GLOBAL_CFG.get_derived_host_item(
            job_conf["suite name"], "suite run directory", job_conf["host"], job_conf["owner"]
        )
        st_env["CYLC_SUITE_WORK_DIR"] = suite_work_dir
        st_env["CYLC_SUITE_SHARE_DIR"] = GLOBAL_CFG.get_derived_host_item(
            job_conf["suite name"], "suite share directory", job_conf["host"], job_conf["owner"]
        )
        # DEPRECATED
        st_env["CYLC_SUITE_SHARE_PATH"] = "$CYLC_SUITE_SHARE_DIR"
        rsp = job_conf["remote suite path"]
        if rsp:
            st_env["CYLC_SUITE_DEF_PATH"] = rsp
        else:
            # replace home dir with '$HOME' for evaluation on the task host
            st_env["CYLC_SUITE_DEF_PATH"] = re.sub(
                os.environ["HOME"], "$HOME", self.suite_env["CYLC_SUITE_DEF_PATH_ON_SUITE_HOST"]
            )
        for var, val in sorted(st_env.items()):
            handle.write("\nexport " + var + "=" + str(val))

        task_work_dir = os.path.join(suite_work_dir, job_conf["work sub-directory"])

        use_login_shell = GLOBAL_CFG.get_host_item("use login shell", job_conf["host"], job_conf["owner"])
        comms = GLOBAL_CFG.get_host_item("task communication method", job_conf["host"], job_conf["owner"])

        task_name, point_string = TaskID.split(job_conf["task id"])
        handle.write("\n\n# CYLC TASK ENVIRONMENT:")
        handle.write("\nexport CYLC_TASK_COMMS_METHOD=" + comms)
        handle.write("\nexport CYLC_TASK_CYCLE_POINT=" + point_string)
        handle.write("\nexport CYLC_TASK_CYCLE_TIME=" + point_string)
        handle.write("\nexport CYLC_TASK_ID=" + job_conf["task id"])
        handle.write("\nexport CYLC_TASK_IS_COLDSTART=" + str(job_conf["is cold-start"]))
        handle.write("\nexport CYLC_TASK_LOG_ROOT=" + job_conf["job file path"])
        handle.write(
            "\nexport CYLC_TASK_MSG_MAX_TRIES=" + str(GLOBAL_CFG.get(["task messaging", "maximum number of tries"]))
        )
        handle.write("\nexport CYLC_TASK_MSG_RETRY_INTVL=" + str(GLOBAL_CFG.get(["task messaging", "retry interval"])))
        handle.write("\nexport CYLC_TASK_MSG_TIMEOUT=" + str(GLOBAL_CFG.get(["task messaging", "connection timeout"])))
        handle.write("\nexport CYLC_TASK_NAME=" + task_name)
        handle.write('\nexport CYLC_TASK_NAMESPACE_HIERARCHY="' + " ".join(job_conf["namespace hierarchy"]) + '"')
        handle.write("\nexport CYLC_TASK_SSH_LOGIN_SHELL=" + str(use_login_shell))
        handle.write("\nexport CYLC_TASK_SUBMIT_NUMBER=" + str(job_conf["absolute submit number"]))
        handle.write("\nexport CYLC_TASK_TRY_NUMBER=" + str(job_conf["try number"]))
        handle.write("\nexport CYLC_TASK_WORK_DIR=" + task_work_dir)
        # DEPRECATED
        handle.write("\nexport CYLC_TASK_WORK_PATH=$CYLC_TASK_WORK_DIR")
        handle.write("\nexport CYLC_JOB_PID=$$")
コード例 #46
0
    def __init__(self, task_id, suite, jobconfig, submit_num):

        self.jobconfig = jobconfig

        self.task_id = task_id
        self.suite = suite
        self.logfiles = jobconfig.get('log files')

        self.command = None
        self.job_submit_command_template = jobconfig.get('command template')

        common_job_log_path = jobconfig.get('common job log path')
        self.local_jobfile_path = jobconfig.get('local job file path')
        self.logfiles.add_path(self.local_jobfile_path)

        task_host = jobconfig.get('task host')
        task_owner = jobconfig.get('task owner')

        self.remote_shell_template = GLOBAL_CFG.get_host_item(
            'remote shell template', task_host, task_owner)

        if is_remote_host(task_host) or is_remote_user(task_owner):
            self.local = False
            if task_owner:
                self.task_owner = task_owner
            else:
                self.task_owner = None

            if task_host:
                self.task_host = task_host
            else:
                self.task_host = socket.gethostname()

            remote_job_log_dir = GLOBAL_CFG.get_derived_host_item(
                self.suite, 'suite job log directory', self.task_host,
                self.task_owner)

            remote_jobfile_path = os.path.join(remote_job_log_dir,
                                               common_job_log_path)

            # Remote log files
            self.stdout_file = remote_jobfile_path + ".out"
            self.stderr_file = remote_jobfile_path + ".err"

            # Used in command construction:
            self.jobfile_path = remote_jobfile_path

            # Record paths of remote log files for access by gui
            if True:
                # by ssh URL
                url_prefix = self.task_host
                if self.task_owner:
                    url_prefix = self.task_owner + "@" + url_prefix
                self.logfiles.add_path(url_prefix + ':' + self.stdout_file)
                self.logfiles.add_path(url_prefix + ':' + self.stderr_file)
            else:
                # CURRENTLY DISABLED:
                # If the remote and suite hosts see a common filesystem, or
                # if the remote task is really just a local task with a
                # different owner, we could just use local filesystem access.
                # But to use this: (a) special namespace config would be
                # required to indicate we have a common filesystem, and
                # (b) we'd need to consider how the log directory can be
                # specified (for example use of '$HOME' as for remote
                # task use would not work here as log file access is by
                # gui under the suite owner account.
                self.logfiles.add_path(self.stdout_file)
                self.logfiles.add_path(self.stderr_file)
        else:
            # LOCAL TASKS
            self.local = True
            self.task_owner = None
            # Used in command construction:
            self.jobfile_path = self.local_jobfile_path

            # Local stdout and stderr log file paths:
            self.stdout_file = self.local_jobfile_path + ".out"
            self.stderr_file = self.local_jobfile_path + ".err"

            # interpolate environment variables in extra logs
            for idx in range(0, len(self.logfiles.paths)):
                self.logfiles.paths[idx] = expandvars(self.logfiles.paths[idx])

            # Record paths of local log files for access by gui
            self.logfiles.add_path(self.stdout_file)
            self.logfiles.add_path(self.stderr_file)

        # set some defaults that can be overridden by derived classes
        self.jobconfig['directive prefix'] = None
        self.jobconfig['directive final'] = "# FINAL DIRECTIVE"
        self.jobconfig['directive connector'] = " "
        self.jobconfig['job vacation signal'] = None

        # overrideable methods
        self.set_directives()
        self.set_job_vacation_signal()
        self.set_scripting()
        self.set_environment()
コード例 #47
0
    def _write_environment_1(self, handle, job_conf):
        """Suite and task environment."""
        handle.write("\n\n# CYLC SUITE ENVIRONMENT:")

        # write the static suite variables
        for var, val in sorted(self.suite_env.items()):
            handle.write("\nexport " + var + "=" + str(val))

        if str(self.suite_env.get('CYLC_UTC')) == 'True':
            handle.write("\nexport TZ=UTC")

        handle.write("\n")
        # override and write task-host-specific suite variables
        suite_work_dir = GLOBAL_CFG.get_derived_host_item(
            job_conf['suite name'], 'suite work directory',
            job_conf['host'], job_conf['owner'])
        st_env = {}
        st_env['CYLC_SUITE_RUN_DIR'] = GLOBAL_CFG.get_derived_host_item(
            job_conf['suite name'], 'suite run directory',
            job_conf['host'], job_conf['owner'])
        st_env['CYLC_SUITE_WORK_DIR'] = suite_work_dir
        st_env['CYLC_SUITE_SHARE_DIR'] = GLOBAL_CFG.get_derived_host_item(
            job_conf['suite name'], 'suite share directory',
            job_conf['host'], job_conf['owner'])
        # DEPRECATED
        st_env['CYLC_SUITE_SHARE_PATH'] = '$CYLC_SUITE_SHARE_DIR'
        rsp = job_conf['remote suite path']
        if rsp:
            st_env['CYLC_SUITE_DEF_PATH'] = rsp
        else:
            # replace home dir with '$HOME' for evaluation on the task host
            st_env['CYLC_SUITE_DEF_PATH'] = re.sub(
                os.environ['HOME'], '$HOME',
                self.suite_env['CYLC_SUITE_DEF_PATH_ON_SUITE_HOST'])
        for var, val in sorted(st_env.items()):
            handle.write("\nexport " + var + "=" + str(val))

        task_work_dir = os.path.join(
            suite_work_dir, job_conf['work sub-directory'])

        use_login_shell = GLOBAL_CFG.get_host_item(
            'use login shell', job_conf['host'], job_conf['owner'])
        comms = GLOBAL_CFG.get_host_item(
            'task communication method', job_conf['host'], job_conf['owner'])

        task_name, point_string = TaskID.split(job_conf['task id'])
        handle.write("\n\n# CYLC TASK ENVIRONMENT:")
        handle.write("\nexport CYLC_TASK_COMMS_METHOD=" + comms)
        handle.write("\nexport CYLC_TASK_CYCLE_POINT=" + point_string)
        handle.write("\nexport CYLC_TASK_CYCLE_TIME=" + point_string)
        handle.write("\nexport CYLC_TASK_ID=" + job_conf['task id'])
        handle.write(
            "\nexport CYLC_TASK_IS_COLDSTART=" +
            str(job_conf['is cold-start']))
        handle.write(
            "\nexport CYLC_TASK_LOG_ROOT=" + job_conf['job file path'])
        handle.write(
            "\nexport CYLC_TASK_MSG_MAX_TRIES=" +
            str(GLOBAL_CFG.get(['task messaging', 'maximum number of tries'])))
        handle.write(
            "\nexport CYLC_TASK_MSG_RETRY_INTVL=%f" %
            GLOBAL_CFG.get(['task messaging', 'retry interval']))
        handle.write(
            "\nexport CYLC_TASK_MSG_TIMEOUT=%f" %
            GLOBAL_CFG.get(['task messaging', 'connection timeout']))
        handle.write("\nexport CYLC_TASK_NAME=" + task_name)
        handle.write(
            '\nexport CYLC_TASK_NAMESPACE_HIERARCHY="' +
            ' '.join(job_conf['namespace hierarchy']) + '"')
        handle.write(
            "\nexport CYLC_TASK_SSH_LOGIN_SHELL=" + str(use_login_shell))
        handle.write(
            "\nexport CYLC_TASK_SUBMIT_NUMBER=" + str(job_conf['submit num']))
        handle.write(
            "\nexport CYLC_TASK_TRY_NUMBER=" +
            str(job_conf['try number']))
        handle.write("\nexport CYLC_TASK_WORK_DIR=" + task_work_dir)
        # DEPRECATED
        handle.write("\nexport CYLC_TASK_WORK_PATH=$CYLC_TASK_WORK_DIR")
        handle.write("\nexport %s=$$" % (TaskMessage.CYLC_JOB_PID))
コード例 #48
0
    def remote_tidy(self):
        """Remove suite contact files from initialised remotes.

        Call "cylc remote-tidy".
        This method is called on suite shutdown, so we want nothing to hang.
        Timeout any incomplete commands after 10 seconds.

        Also remove UUID file on suite host ".service/uuid".
        """
        # Remove UUID file
        uuid_fname = os.path.join(
            self.suite_srv_files_mgr.get_suite_srv_dir(self.suite), 'uuid')
        try:
            os.unlink(uuid_fname)
        except OSError:
            pass
        # Issue all SSH commands in parallel
        procs = {}
        for (host, owner), init_with_contact in self.remote_init_map.items():
            if init_with_contact != REMOTE_INIT_DONE:
                continue
            cmd = ['timeout', '10', 'cylc', 'remote-tidy']
            if is_remote_host(host):
                cmd.append('--host=%s' % host)
            if is_remote_user(owner):
                cmd.append('--user=%s' % owner)
            if cylc.flags.debug:
                cmd.append('--debug')
            cmd.append(
                os.path.join(
                    GLOBAL_CFG.get_derived_host_item(self.suite,
                                                     'suite run directory',
                                                     host, owner)))
            procs[(host, owner)] = (cmd,
                                    Popen(cmd,
                                          stdout=PIPE,
                                          stderr=PIPE,
                                          stdin=open(os.devnull)))
        # Wait for commands to complete for a max of 10 seconds
        timeout = time() + 10.0
        while procs and time() < timeout:
            for (host, owner), (cmd, proc) in procs.copy().items():
                if proc.poll() is None:
                    continue
                del procs[(host, owner)]
                out, err = proc.communicate()
                if proc.wait():
                    LOG.warning(
                        TaskRemoteMgmtError(
                            TaskRemoteMgmtError.MSG_TIDY, (host, owner),
                            ' '.join(quote(item) for item in cmd),
                            proc.ret_code, out, err))
        # Terminate any remaining commands
        for (host, owner), (cmd, proc) in procs.items():
            try:
                proc.terminate()
            except OSError:
                pass
            out, err = proc.communicate()
            if proc.wait():
                LOG.warning(
                    TaskRemoteMgmtError(TaskRemoteMgmtError.MSG_TIDY,
                                        (host, owner),
                                        ' '.join(quote(item) for item in cmd),
                                        proc.ret_code, out, err))