Exemple #1
0
    def job_logs_archive(self, suite_name, items):
        """Archive cycle job logs.

        suite_name -- The name of a suite.
        items -- A list of relevant items.

        """
        cycles = []
        if "*" in items:
            stmt = "SELECT DISTINCT cycle FROM task_jobs"
            for row in self._db_exec(suite_name, stmt):
                cycles.append(row[0])
            self._db_close(suite_name)
        else:
            for item in items:
                cycle = self._parse_task_cycle_id(item)[0]
                if cycle:
                    cycles.append(cycle)
        self.job_logs_housekeep_remote(
            suite_name, cycles, prune_remote_mode=True)
        cwd = os.getcwd()
        self.fs_util.chdir(self.get_suite_dir(suite_name))
        try:
            for cycle in cycles:
                archive_file_name0 = os.path.join(
                    "log", "job-" + cycle + ".tar"
                )
                archive_file_name = archive_file_name0 + ".gz"
                if os.path.exists(archive_file_name):
                    continue
                glob_ = os.path.join(cycle, "*", "*", "*")
                names = glob(os.path.join("log", "job", glob_))
                if not names:
                    continue
                f_bsize = os.statvfs(".").f_bsize
                tar = tarfile.open(archive_file_name0, "w", bufsize=f_bsize)
                for name in names:
                    cycle, _, s_n, ext = self.parse_job_log_rel_path(name)
                    if s_n == "NN" or ext == "job.status":
                        continue
                    tar.add(name, name.replace("log/", "", 1))
                tar.close()
                # N.B. Python's gzip is slow
                self.popen.run_simple("gzip", "-f", archive_file_name0)
                self.handle_event(
                    FileSystemEvent(FileSystemEvent.CREATE, archive_file_name)
                )
                self.fs_util.delete(os.path.join("log", "job", cycle))
        finally:
            try:
                self.fs_util.chdir(cwd)
            except OSError:
                pass
Exemple #2
0
    def process(self,
                conf_tree,
                item,
                orig_keys=None,
                orig_value=None,
                **kwargs):
        """Process [jinja2:*] in "conf_tree.node".

        Arguments:
            conf_tree:
                The relevant metomi.rose.config_tree.ConfigTree object with the
                full configuration.
            item: The current configuration item to process.
            orig_keys:
                The keys for locating the originating setting in conf_tree in a
                recursive processing. None implies a top level call.
            orig_value: The value of orig_keys in conf_tree.
            **kwargs:
                environ (dict): suite level environment variables.
        """
        for s_key, s_node in sorted(conf_tree.node.value.items()):
            if (s_node.is_ignored() or not s_key.startswith(self.PREFIX)
                    or not s_node.value):
                continue
            target = s_key[len(self.PREFIX):]
            source = os.path.join(conf_tree.files[target], target)
            if not os.access(source, os.F_OK | os.R_OK):
                continue
            scheme_ln = self.SCHEME_TEMPL % self.SCHEME
            msg_init_ln = self.COMMENT_TEMPL % self.MSG_INIT
            msg_done_ln = self.COMMENT_TEMPL % self.MSG_DONE
            tmp_file = NamedTemporaryFile()
            tmp_file.write(scheme_ln.encode('UTF-8'))
            tmp_file.write(msg_init_ln.encode('UTF-8'))
            suite_variables = ['{']
            for key, node in sorted(s_node.value.items()):
                if node.is_ignored():
                    continue
                try:
                    value = env_var_process(node.value)
                except UnboundEnvironmentVariableError as exc:
                    raise ConfigProcessError([s_key, key], node.value, exc)
                tmp_file.write(
                    (self.ASSIGN_TEMPL % (key, value)).encode('UTF-8'))
                suite_variables.append("    '%s': %s," % (key, key))
            suite_variables.append('}')
            suite_variables = self.ASSIGN_TEMPL % ('ROSE_SUITE_VARIABLES',
                                                   '\n'.join(suite_variables))
            tmp_file.write(suite_variables.encode('UTF-8'))
            environ = kwargs.get("environ")
            if environ:
                tmp_file.write('[cylc]\n'.encode('UTF-8'))
                tmp_file.write('    [[environment]]\n'.encode('UTF-8'))
                for key, value in sorted(environ.items()):
                    tmp_file.write(
                        ('        %s=%s\n' % (key, value)).encode('UTF-8'))
            tmp_file.write(msg_done_ln.encode('UTF-8'))
            line_n = 0
            is_in_old_insert = False
            for line in open(source):
                line_n += 1
                if line_n == 1 and line.strip().lower() == scheme_ln.strip():
                    continue
                elif line_n == 2 and line == msg_init_ln:
                    is_in_old_insert = True
                    continue
                elif is_in_old_insert and line == msg_done_ln:
                    is_in_old_insert = False
                    continue
                elif is_in_old_insert:
                    continue
                tmp_file.write(line.encode('UTF-8'))
            tmp_file.seek(0)
            if os.access(target, os.F_OK | os.R_OK):
                if filecmp.cmp(target, tmp_file.name):  # identical
                    tmp_file.close()
                    continue
                else:
                    self.manager.fs_util.delete(target)
            # Write content to target
            target_file = open(target, "w")
            for line in tmp_file:
                try:
                    target_file.write(line)
                except TypeError:
                    target_file.write(line.decode())
            event = FileSystemEvent(FileSystemEvent.INSTALL, target)
            self.manager.handle_event(event)
            tmp_file.close()
Exemple #3
0
    def job_logs_pull_remote(self, suite_name, items,
                             prune_remote_mode=False, force_mode=False):
        """Pull and housekeep the job logs on remote task hosts.

        suite_name -- The name of a suite.
        items -- A list of relevant items.
        prune_remote_mode -- Remove remote job logs after pulling them.
        force_mode -- Pull even if "job.out" already exists.

        """
        # Pull from remote.
        # Create a file with a uuid name, so system knows to do nothing on
        # shared file systems.
        uuid = str(uuid4())
        log_dir_rel = self.get_suite_dir_rel(suite_name, "log", "job")
        log_dir = os.path.join(os.path.expanduser("~"), log_dir_rel)
        uuid_file_name = os.path.join(log_dir, uuid)
        self.fs_util.touch(uuid_file_name)
        try:
            auths_filters = []  # [(auths, includes, excludes), ...]
            if "*" in items:
                auths = self.get_suite_jobs_auths(suite_name)
                if auths:
                    # A shuffle here should allow the load for doing "rm -rf"
                    # to be shared between job hosts who share a file system.
                    shuffle(auths)
                    auths_filters.append((auths, [], []))
            else:
                for item in items:
                    cycle, name = self._parse_task_cycle_id(item)
                    if cycle is not None:
                        arch_f_name = "job-" + cycle + ".tar.gz"
                        if os.path.exists(arch_f_name):
                            continue
                    # Don't bother if "job.out" already exists
                    # Unless forced to do so
                    if (cycle is not None and name is not None and
                            not prune_remote_mode and not force_mode and
                            os.path.exists(os.path.join(
                                log_dir, str(cycle), name, "NN", "job.out"))):
                        continue
                    auths = self.get_suite_jobs_auths(
                        suite_name, [(cycle, name)])
                    if auths:
                        # A shuffle here should allow the load for doing "rm
                        # -rf" to be shared between job hosts who share a file
                        # system.
                        shuffle(auths)
                        includes = []
                        excludes = []
                        if cycle is None and name is None:
                            includes = []
                            excludes = []
                        elif name is None:
                            includes = ["/" + cycle]
                            excludes = ["/*"]
                        elif cycle is None:
                            includes = ["/*/" + name]
                            excludes = ["/*/*"]
                        else:
                            includes = ["/" + cycle, "/" + cycle + "/" + name]
                            excludes = ["/*", "/*/*"]
                        auths_filters.append((auths, includes, excludes))

            for auths, includes, excludes in auths_filters:
                for auth in auths:
                    data = {"auth": auth,
                            "log_dir_rel": log_dir_rel,
                            "uuid": uuid,
                            "glob_": "*"}
                    if includes:
                        data["glob_"] = includes[-1][1:]  # Remove leading /
                    cmd = self.popen.get_cmd(
                        "ssh", auth,
                        ("cd %(log_dir_rel)s && " +
                         "(! test -f %(uuid)s && ls -d %(glob_)s)") % data)
                    ret_code, ssh_ls_out, _ = self.popen.run(*cmd)
                    if ret_code:
                        continue
                    cmd_list = ["rsync"]
                    for include in includes:
                        cmd_list.append("--include=" + include)
                    for exclude in excludes:
                        cmd_list.append("--exclude=" + exclude)
                    cmd_list.append("%(auth)s:%(log_dir_rel)s/" % data)
                    cmd_list.append(log_dir)
                    try:
                        cmd = self.popen.get_cmd(*cmd_list)
                        self.popen(*cmd)
                    except RosePopenError as exc:
                        self.handle_event(exc, level=Reporter.WARN)
                    if not prune_remote_mode:
                        continue
                    try:
                        cmd = self.popen.get_cmd(
                            "ssh", auth,
                            "cd %(log_dir_rel)s && rm -fr %(glob_)s" % data)
                        self.popen(*cmd)
                    except RosePopenError as exc:
                        self.handle_event(exc, level=Reporter.WARN)
                    else:
                        for line in sorted(ssh_ls_out.splitlines()):
                            event = FileSystemEvent(
                                FileSystemEvent.DELETE,
                                "%s:log/job/%s/" % (auth, line))
                            self.handle_event(event)
        finally:
            self.fs_util.delete(uuid_file_name)
Exemple #4
0
    def run(self, app_runner, conf_tree, opts, args, uuid, work_files):
        """Suite housekeeping application.

        This application is designed to work under "rose task-run" in a cycling
        suite.

        """
        suite_name = os.getenv("ROSE_SUITE_NAME")
        if not suite_name:
            return

        # Tar-gzip job logs on suite host
        # Prune job logs on remote hosts and suite host
        prune_remote_logs_cycles = self._get_conf(app_runner, conf_tree,
                                                  "prune-remote-logs-at")
        # Prune job logs on suite host
        prune_server_logs_cycles = self._get_conf(app_runner, conf_tree,
                                                  "prune-server-logs-at")
        archive_logs_cycles = self._get_conf(app_runner, conf_tree,
                                             "archive-logs-at")
        if (prune_remote_logs_cycles or prune_server_logs_cycles
                or archive_logs_cycles):
            tmp_prune_remote_logs_cycles = []
            for cycle in prune_remote_logs_cycles:
                if cycle not in archive_logs_cycles:
                    tmp_prune_remote_logs_cycles.append(cycle)
            prune_remote_logs_cycles = tmp_prune_remote_logs_cycles

            tmp_prune_server_logs_cycles = []
            for cycle in prune_server_logs_cycles:
                if cycle not in archive_logs_cycles:
                    tmp_prune_server_logs_cycles.append(cycle)
            prune_server_logs_cycles = tmp_prune_server_logs_cycles

            if prune_remote_logs_cycles:
                app_runner.suite_engine_proc.job_logs_housekeep_remote(
                    suite_name,
                    prune_remote_logs_cycles,
                    prune_remote_mode=True,
                )

            if prune_server_logs_cycles:
                app_runner.suite_engine_proc.job_logs_remove_on_server(
                    suite_name, prune_server_logs_cycles)

            if archive_logs_cycles:
                app_runner.suite_engine_proc.job_logs_archive(
                    suite_name, archive_logs_cycles)

        # Prune other directories
        globs, cycle_set = self._get_prune_globs(app_runner, conf_tree)
        if not globs:
            return
        suite_engine_proc = app_runner.suite_engine_proc
        hosts = suite_engine_proc.get_suite_jobs_auths(
            suite_name, [(cycle, None) for cycle in cycle_set])
        # A shuffle here should allow the load for doing "rm -rf" to be shared
        # between job hosts who share a file system.
        shuffle(hosts)
        suite_dir_rel = suite_engine_proc.get_suite_dir_rel(suite_name)
        form_dict = {"d": suite_dir_rel, "g": " ".join(globs)}
        sh_cmd_head = r"set -e; cd %(d)s; " % form_dict
        sh_cmd = (r"set +e; ls -d %(g)s; " +
                  r"set -e; rm -fr %(g)s") % form_dict
        cwd = os.getcwd()
        host_selector = HostSelector(app_runner.event_handler,
                                     app_runner.popen)
        for host in hosts + [host_selector.get_local_host()]:
            sdir = None
            try:
                if host_selector.is_local_host(host):
                    sdir = suite_engine_proc.get_suite_dir(suite_name)
                    app_runner.fs_util.chdir(sdir)
                    out = app_runner.popen.run_ok("bash", "-O", "extglob",
                                                  "-c", sh_cmd)[0]
                else:
                    cmd = app_runner.popen.get_cmd(
                        "ssh",
                        host,
                        "bash -O extglob -c '" + sh_cmd_head + sh_cmd + "'",
                    )
                    out = app_runner.popen.run_ok(*cmd)[0]
            except RosePopenError as exc:
                app_runner.handle_event(exc)
            else:
                if sdir is None:
                    event = FileSystemEvent(FileSystemEvent.CHDIR,
                                            host + ":" + suite_dir_rel)
                    app_runner.handle_event(event)
                for line in sorted(out.splitlines()):
                    if not host_selector.is_local_host(host):
                        line = host + ":" + line
                    event = FileSystemEvent(FileSystemEvent.DELETE, line)
                    app_runner.handle_event(event)
            finally:
                if sdir:
                    app_runner.fs_util.chdir(cwd)
        return
Exemple #5
0
    def _invoke_fcm_make(
        self,
        app_runner,
        conf_tree,
        opts,
        args,
        uuid,
        task,
        dests,
        fast_root,
        make_name,
    ):
        """Wrap "fcm make" call, may use fast_root working directory."""
        if opts.new_mode:
            # Remove items in destinations in new mode
            # Ensure that it is not the current working directory, which should
            # already be cleaned.
            open(uuid, "w").close()
            try:
                for dest in dests:
                    if dest and ":" in dest:
                        # Remove a remote destination
                        auth, name = dest.split(":", 1)
                        cmd = app_runner.popen.get_cmd(
                            "ssh",
                            auth,
                            ("! test -e %(name)s/%(uuid)s && " +
                             "(ls -d %(name)s || true) && rm -fr %(name)s") % {
                                 "name": quote(name),
                                 "uuid": uuid
                             },
                        )
                        out = app_runner.popen.run_ok(*cmd)[0]
                        for line in out.splitlines():
                            if line == name:
                                app_runner.handle_event(
                                    FileSystemEvent(FileSystemEvent.DELETE,
                                                    dest))
                    elif dest and not os.path.exists(os.path.join(dest, uuid)):
                        # Remove a local destination
                        app_runner.fs_util.delete(dest)
            finally:
                os.unlink(uuid)
        # "rsync" existing dest to fast working directory, if relevant
        # Only work with fcm-2015.05+
        dest = dests[0]
        if fast_root:
            # N.B. Name in "little endian", like cycle task ID
            prefix = ".".join([
                task.task_name,
                task.task_cycle_time,
                # suite_name may be a hierarchical registration which
                # isn't a safe prefix
                task.suite_name.replace(os.sep, '_'),
            ])
            os.makedirs(fast_root, exist_ok=True)
            dest = mkdtemp(prefix=prefix, dir=fast_root)
            # N.B. Don't use app_runner.popen.get_cmd("rsync") as we are using
            #      "rsync" for a local copy.
            rsync_prefixes = ["rsync", "-a"]
            if not dests[0]:
                dests[0] = "."
            if os.path.isdir(dests[0]):
                cmd = rsync_prefixes + [dests[0] + os.sep, dest + os.sep]
                try:
                    app_runner.popen.run_simple(*cmd)
                except RosePopenError:
                    app_runner.fs_util.delete(dest)
                    raise

        # Launch "fcm make"
        cmd = self._get_fcm_make_cmd(conf_tree, opts, args, dest, make_name)
        try:
            app_runner.popen(*cmd, stdout=sys.stdout, stderr=sys.stderr)
        finally:
            # "rsync" fast working directory to dests[0], if relevant
            if dest != dests[0] and os.path.isdir(dest):
                app_runner.fs_util.makedirs(dests[0])
                stat = os.stat(dests[0])
                cmd = rsync_prefixes + [dest + os.sep, dests[0] + os.sep]
                app_runner.popen.run_simple(*cmd)
                os.chmod(dests[0], stat.st_mode)
                app_runner.fs_util.delete(dest)
Exemple #6
0
 def _clean(self, suite_name, only_items=None):
     """Perform the cleaning operations."""
     engine = self.suite_engine_proc
     suite_dir_rel = engine.get_suite_dir_rel(suite_name)
     locs_file_path = engine.get_suite_dir(suite_name, "log",
                                           "rose-suite-run.locs")
     locs_conf = ConfigNode().set(["localhost"], {})
     try:
         ConfigLoader().load(locs_file_path, locs_conf)
     except IOError:
         pass
     items = self.CLEANABLE_PATHS + [""]
     if only_items:
         items = only_items
     items.sort()
     uuid_str = str(uuid4())
     for auth, node in sorted(locs_conf.value.items(),
                              key=cmp_to_key(self._auth_node_cmp)):
         locs = []
         roots = set([""])
         for item in items:
             if item:
                 locs.append(os.path.join(suite_dir_rel, item))
             else:
                 locs.append(suite_dir_rel)
             if item and os.path.normpath(item) in self.CLEANABLE_PATHS:
                 item_root = node.get_value(["root-dir{" + item + "}"])
                 if item_root is None:  # backward compat
                     item_root = node.get_value(["root-dir-" + item])
             elif item == "":
                 item_root = node.get_value(["root-dir"])
             else:
                 continue
             if item_root:
                 loc_rel = suite_dir_rel
                 if item:
                     loc_rel = os.path.join(suite_dir_rel, item)
                 locs.append(os.path.join(item_root, loc_rel))
                 roots.add(item_root)
         locs.reverse()
         # Invoke bash as a login shell. The root location of a path may be
         # in $DIR syntax, which can only be expanded correctly in a login
         # shell. However, profile scripts invoked on login shell may print
         # lots of junks. Hence we use a UUID here as a delimiter. Only
         # output after the UUID lines are desirable lines.
         command = ["bash", "-l", "-O", "extglob", "-c"]
         sh_command = "cd; echo '%s'" % (uuid_str, )
         if not self.host_selector.is_local_host(auth):
             command = engine.popen.get_cmd("ssh", auth) + command
         sh_command += "; ls -d -r %(locs)s; rm -fr %(locs)s" % {
             "locs": engine.popen.list_to_shell_str(locs)
         }
         if not only_items:
             # Clean empty directories
             # Change directory to root level to avoid cleaning them as
             # well For cylc suites, e.g. it can clean up to an empty
             # "cylc-run/" directory.
             for root in roots:
                 names = []
                 # Reverse sort to ensure that e.g. "share/cycle/" is
                 # cleaned before "share/"
                 for name in sorted(self.CLEANABLE_PATHS, reverse=True):
                     names.append(os.path.join(suite_dir_rel, name))
                 if os.sep in suite_dir_rel:
                     names.append(os.path.dirname(suite_dir_rel))
                 sh_command += (
                     "; " + "(cd %(root)s; " +
                     "rmdir -p %(names)s 2>/dev/null || true)") % {
                         "root": root,
                         "names": engine.popen.list_to_shell_str(names),
                     }
         if self.host_selector.is_local_host(auth):
             command.append(sh_command)
         else:
             command.append(quote(sh_command))
         is_after_uuid_str = False
         for line in engine.popen(*command)[0].splitlines():
             line = line.decode()
             if is_after_uuid_str:
                 engine.handle_event(
                     FileSystemEvent(FileSystemEvent.DELETE,
                                     auth + ":" + line.strip()))
             elif line == uuid_str:
                 is_after_uuid_str = True