def scan(self, hosts=None, timeout=None): """Scan for running suites (in hosts). Return (suite_scan_results, exceptions) where suite_scan_results is a list of SuiteScanResult instances and exceptions is a list of exceptions resulting from any failed scans Default timeout for SSH and "cylc scan" command is 5 seconds. """ if not hosts: hosts = ["localhost"] if timeout is None: timeout = self.TIMEOUT cmd = ["cylc", "scan", "--comms-timeout=%s" % timeout] + list(hosts) proc = self.popen.run_bg(*cmd) results = {} exceptions = [] end_time = time() + timeout while time() < end_time: sleep(0.1) ret_code = proc.poll() if ret_code is None: continue if ret_code == 0: for line in proc.communicate()[0].splitlines(): name, location = line.split() host = location.split("@")[1].split(":")[0] results[(name, host, _PORT_SCAN)] = SuiteScanResult(name, location) else: out, err = proc.communicate() exceptions.append(RosePopenError(cmd, ret_code, out, err)) proc = None break # Timed out, kill remaining processes if proc is not None: try: os.killpg(proc.pid, signal.SIGTERM) except OSError: pass else: ret_code = proc.wait() out, err = proc.communicate() exceptions.append(RosePopenError(cmd, ret_code, out, err)) return (sorted(results.values()), exceptions)
def run(self, app_runner, conf_tree, opts, args, uuid, work_files): """ Run multiple instaces of a command using sets of specified args""" # Counts for reporting purposes run_ok = 0 run_fail = 0 run_skip = 0 notrun = 0 # Allow naming of individual calls self.invocation_names = conf_tree.node.get_value( [self.BUNCH_SECTION, "names"]) if self.invocation_names: self.invocation_names = shlex.split( rose.env.env_var_process(self.invocation_names)) if len(set(self.invocation_names)) != len(self.invocation_names): raise ConfigValueError([self.BUNCH_SECTION, "names"], self.invocation_names, "names must be unique") self.fail_mode = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "fail-mode"], self.TYPE_CONTINUE_ON_FAIL)) if self.fail_mode not in self.FAIL_MODE_TYPES: raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"], fail_mode, "not a valid setting") self.incremental = conf_tree.node.get_value( [self.BUNCH_SECTION, "incremental"], "true") if self.incremental: self.incremental = rose.env.env_var_process(self.incremental) multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {}) for key, val in multi_args.items(): multi_args[key].value = rose.env.env_var_process(val.value) self.command_format = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"])) if not self.command_format: raise CompulsoryConfigValueError( [self.BUNCH_SECTION, "command-format"], None, KeyError("command-format")) # Set up command-instances if needed instances = conf_tree.node.get_value( [self.BUNCH_SECTION, "command-instances"]) if instances: try: instances = range(int(rose.env.env_var_process(instances))) except ValueError: raise ConfigValueError( [self.BUNCH_SECTION, "command-instances"], instances, "not an integer value") # Validate runlists if not self.invocation_names: if instances: arglength = len(instances) else: item, val = sorted(multi_args.items())[0] arglength = len(shlex.split(val.value)) self.invocation_names = range(0, arglength) else: arglength = len(self.invocation_names) for item, val in sorted(multi_args.items()): if len(shlex.split(val.value)) != arglength: raise ConfigValueError([self.ARGS_SECTION, item], conf_tree.node.get_value( [self.ARGS_SECTION, item]), "inconsistent arg lengths") if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]): raise ConfigValueError([self.ARGS_SECTION, "command-instances"], conf_tree.node.get_value([ self.ARGS_SECTION, "command-instances" ]), "reserved keyword") if instances and arglength != len(instances): raise ConfigValueError([self.BUNCH_SECTION, "command-instances"], instances, "inconsistent arg lengths") # Set max number of processes to run at once max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"]) if max_procs: self.MAX_PROCS = int(rose.env.env_var_process(max_procs)) else: self.MAX_PROCS = arglength if self.incremental == "true": self.dao = RoseBunchDAO(conf_tree) else: self.dao = None commands = {} for index, name in enumerate(self.invocation_names): invocation = RoseBunchCmd(name, self.command_format, index) for key, vals in sorted(multi_args.items()): invocation.argsdict[key] = shlex.split(vals.value)[index] if instances: invocation.argsdict["command-instances"] = instances[index] commands[name] = invocation procs = {} if 'ROSE_TASK_LOG_DIR' in os.environ: log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s") else: log_format = os.path.join(os.getcwd(), "%s") failed = {} abort = False while procs or (commands and not abort): for key, proc in procs.items(): if proc.poll() is not None: procs.pop(key) if proc.returncode: failed[key] = proc.returncode run_fail += 1 app_runner.handle_event( RosePopenError(str(key), proc.returncode, None, None)) if self.dao: self.dao.update_command_state(key, self.dao.S_FAIL) if self.fail_mode == self.TYPE_ABORT_ON_FAIL: abort = True app_runner.handle_event(AbortEvent()) else: run_ok += 1 app_runner.handle_event(SucceededEvent(key), prefix=self.PREFIX_OK) if self.dao: self.dao.update_command_state(key, self.dao.S_PASS) while len(procs) < self.MAX_PROCS and commands and not abort: key = self.invocation_names[0] command = commands.pop(key) self.invocation_names.pop(0) cmd = command.get_command() cmd_stdout = log_format % command.get_out_file() cmd_stderr = log_format % command.get_err_file() prefix = command.get_log_prefix() bunch_environ = os.environ bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix if self.dao: if self.dao.check_has_succeeded(key): run_skip += 1 app_runner.handle_event(PreviousSuccessEvent(key), prefix=self.PREFIX_PASS) continue else: self.dao.add_command(key) app_runner.handle_event(LaunchEvent(key, cmd)) procs[key] = \ app_runner.popen.run_bg(cmd, shell=True, stdout=open(cmd_stdout, 'w'), stderr=open(cmd_stderr, 'w'), env=bunch_environ) sleep(self.SLEEP_DURATION) if abort and commands: for key in self.invocation_names: notrun += 1 cmd = commands.pop(key).get_command() app_runner.handle_event(NotRunEvent(key, cmd), prefix=self.PREFIX_NOTRUN) if self.dao: self.dao.close() # Report summary data in job.out file app_runner.handle_event( SummaryEvent(run_ok, run_fail, run_skip, notrun)) if failed: return 1 else: return 0
def _run_target_update(cls, dao, app_runner, compress_manager, target): """Helper for _run. Update a target.""" if target.status == target.ST_OLD: app_runner.handle_event(RoseArchEvent(target)) return if target.status in (target.ST_BAD, target.ST_NULL): # boolean to int target.command_rc = int(target.status == target.ST_BAD) if target.status == target.ST_BAD: level = Event.FAIL else: level = Event.DEFAULT event = RoseArchEvent(target) app_runner.handle_event(event) app_runner.handle_event(event, kind=Event.KIND_ERR, level=level) return target.command_rc = 1 dao.insert(target) work_dir = mkdtemp() times = [time()] * 3 # init, transformed, archived ret_code = None try: # Rename/edit sources target.status = target.ST_BAD rename_required = False for source in target.sources.values(): if source.name != source.orig_name: rename_required = True break if rename_required or target.source_edit_format: for source in target.sources.values(): source.path = os.path.join(work_dir, source.name) app_runner.fs_util.makedirs(os.path.dirname(source.path)) if target.source_edit_format: command = target.source_edit_format % { "in": source.orig_path, "out": source.path } app_runner.popen.run_ok(command, shell=True) else: app_runner.fs_util.symlink(source.orig_path, source.path) # Compress sources if target.compress_scheme: handler = compress_manager.get_handler(target.compress_scheme) handler.compress_sources(target, work_dir) times[1] = time() # transformed time # Run archive command sources = [] if target.work_source_path: sources = [target.work_source_path] else: for source in target.sources.values(): sources.append(source.path) command = target.command_format % { "sources": app_runner.popen.list_to_shell_str(sources), "target": app_runner.popen.list_to_shell_str([target.name]) } ret_code, out, err = app_runner.popen.run(command, shell=True) times[2] = time() # archived time if ret_code: app_runner.handle_event( RosePopenError([command], ret_code, out, err)) else: target.status = target.ST_NEW app_runner.handle_event(err, kind=Event.KIND_ERR) app_runner.handle_event(out) target.command_rc = ret_code dao.update_command_rc(target) finally: app_runner.fs_util.delete(work_dir) event = RoseArchEvent(target, times, ret_code) app_runner.handle_event(event) if target.status in (target.ST_BAD, target.ST_NULL): app_runner.handle_event(event, kind=Event.KIND_ERR, level=Event.FAIL)
def run_impl(self, opts, args, uuid, work_files): # Log file, temporary if hasattr(self.event_handler, "contexts"): t_file = TemporaryFile() log_context = ReporterContext(None, self.event_handler.VV, t_file) self.event_handler.contexts[uuid] = log_context # Check suite engine specific compatibility self.suite_engine_proc.check_global_conf_compat() # Suite name from the current working directory if opts.conf_dir: self.fs_util.chdir(opts.conf_dir) opts.conf_dir = os.getcwd() if opts.defines_suite: suite_section = "jinja2:" + self.suite_engine_proc.SUITE_CONF if not opts.defines: opts.defines = [] for define in opts.defines_suite: opts.defines.append("[" + suite_section + "]" + define) # --remote=KEY=VALUE,... if opts.remote: # opts.name always set for remote. return self._run_remote(opts, opts.name) conf_tree = self.config_load(opts) self.fs_util.chdir(conf_tree.conf_dirs[0]) suite_name = opts.name if not opts.name: suite_name = os.path.basename(os.getcwd()) # Automatic Rose constants # ROSE_ORIG_HOST: originating host # ROSE_VERSION: Rose version (not retained in run_mode=="reload") # Suite engine version jinja2_section = "jinja2:" + self.suite_engine_proc.SUITE_CONF my_rose_version = ResourceLocator.default().get_version() suite_engine_key = self.suite_engine_proc.get_version_env_name() if opts.run_mode in ["reload", "restart"]: prev_config_path = self.suite_engine_proc.get_suite_dir( suite_name, "log", "rose-suite-run.conf") prev_config = ConfigLoader()(prev_config_path) suite_engine_version = prev_config.get_value( ["env", suite_engine_key]) else: suite_engine_version = self.suite_engine_proc.get_version() auto_items = { "ROSE_ORIG_HOST": self.host_selector.get_local_host(), "ROSE_VERSION": ResourceLocator.default().get_version(), suite_engine_key: suite_engine_version } for key, val in auto_items.items(): requested_value = conf_tree.node.get_value(["env", key]) if requested_value: if key == "ROSE_VERSION" and val != requested_value: exc = VersionMismatchError(requested_value, val) raise ConfigValueError(["env", key], requested_value, exc) val = requested_value else: conf_tree.node.set(["env", key], val, state=conf_tree.node.STATE_NORMAL) conf_tree.node.set([jinja2_section, key], '"' + val + '"') # See if suite is running or not hosts = [] if opts.host: hosts.append(opts.host) if opts.run_mode == "reload": suite_run_hosts = self.suite_engine_proc.get_suite_run_hosts( None, suite_name, hosts) if not suite_run_hosts: raise SuiteNotRunningError(suite_name) hosts = suite_run_hosts else: self.suite_engine_proc.check_suite_not_running(suite_name, hosts) # Install the suite to its run location suite_dir_rel = self._suite_dir_rel(suite_name) suite_dir = os.path.join(os.path.expanduser("~"), suite_dir_rel) suite_conf_dir = os.getcwd() locs_conf = ConfigNode() if opts.new_mode: if os.getcwd() == suite_dir: raise NewModeError("PWD", os.getcwd()) elif opts.run_mode in ["reload", "restart"]: raise NewModeError("--run", opts.run_mode) self.suite_run_cleaner.clean(suite_name) if os.getcwd() != suite_dir: if opts.run_mode == "run": self._run_init_dir(opts, suite_name, conf_tree, locs_conf=locs_conf) os.chdir(suite_dir) # Housekeep log files if not opts.install_only_mode and not opts.local_install_only_mode: self._run_init_dir_log(opts) self.fs_util.makedirs("log/suite") # Rose configuration and version logs self.fs_util.makedirs("log/rose-conf") run_mode = opts.run_mode if run_mode not in ["reload", "restart", "run"]: run_mode = "run" mode = run_mode if opts.install_only_mode: mode = "install-only" elif opts.local_install_only_mode: mode = "local-install-only" prefix = "rose-conf/%s-%s" % (strftime("%Y%m%dT%H%M%S"), mode) # Dump the actual configuration as rose-suite-run.conf ConfigDumper()(conf_tree.node, "log/" + prefix + ".conf") # Install version information file write_source_vc_info(suite_conf_dir, "log/" + prefix + ".version", self.popen) # If run through rose-stem, install version information files for # each source tree if they're a working copy if hasattr(opts, 'source') and hasattr(opts, 'project'): for i, url in enumerate(opts.source): if os.path.isdir(url): write_source_vc_info( url, "log/" + opts.project[i] + "-" + str(i) + ".version", self.popen) for ext in [".conf", ".version"]: self.fs_util.symlink(prefix + ext, "log/rose-suite-run" + ext) # Move temporary log to permanent log if hasattr(self.event_handler, "contexts"): log_file_path = os.path.abspath( os.path.join("log", "rose-suite-run.log")) log_file = open(log_file_path, "ab") temp_log_file = self.event_handler.contexts[uuid].handle temp_log_file.seek(0) log_file.write(temp_log_file.read()) self.event_handler.contexts[uuid].handle = log_file temp_log_file.close() # Install share/work directories (local) for name in ["share", "share/cycle", "work"]: self._run_init_dir_work(opts, suite_name, name, conf_tree, locs_conf=locs_conf) # Process Environment Variables environ = self.config_pm(conf_tree, "env") # Process Files cwd = os.getcwd() for rel_path, conf_dir in conf_tree.files.items(): if (conf_dir == cwd or any([ fnmatchcase(os.sep + rel_path, exclude) for exclude in self.SYNC_EXCLUDES ]) or conf_tree.node.get(["jinja2:" + rel_path]) is not None): continue # No sub-directories, very slow otherwise if os.sep in rel_path: rel_path = rel_path.split(os.sep, 1)[0] target_key = self.config_pm.get_handler("file").PREFIX + rel_path target_node = conf_tree.node.get([target_key]) if target_node is None: conf_tree.node.set([target_key]) target_node = conf_tree.node.get([target_key]) elif target_node.is_ignored(): continue source_node = target_node.get("source") if source_node is None: target_node.set(["source"], os.path.join(conf_dir, rel_path)) elif source_node.is_ignored(): continue self.config_pm(conf_tree, "file", no_overwrite_mode=opts.no_overwrite_mode) # Process Jinja2 configuration self.config_pm(conf_tree, "jinja2") # Ask suite engine to parse suite configuration # and determine if it is up to date (unchanged) suite_conf_unchanged = self.suite_engine_proc.cmp_suite_conf( suite_name, opts.run_mode, opts.strict_mode, opts.debug_mode) if opts.local_install_only_mode: return # Install suite files to each remote [user@]host for name in ["", "log/", "share/", "share/cycle/", "work/"]: uuid_file = os.path.abspath(name + uuid) open(uuid_file, "w").close() work_files.append(uuid_file) # Install items to user@host conf = ResourceLocator.default().get_conf() auths = self.suite_engine_proc.get_tasks_auths(suite_name) proc_queue = [] # [[proc, command, "ssh"|"rsync", auth], ...] for auth in sorted(auths): host = auth if "@" in auth: host = auth.split("@", 1)[1] # Remote shell command = self.popen.get_cmd("ssh", "-n", auth) # Provide ROSE_VERSION and CYLC_VERSION in the environment shcommand = "env ROSE_VERSION=%s %s=%s" % ( my_rose_version, suite_engine_key, suite_engine_version) # Use login shell? no_login_shell = self._run_conf("remote-no-login-shell", host=host, conf_tree=conf_tree) if not no_login_shell or no_login_shell.lower() != "true": shcommand += r""" bash -l -c '"$0" "$@"'""" # Path to "rose" command, if applicable rose_bin = self._run_conf("remote-rose-bin", host=host, conf_tree=conf_tree, default="rose") # Build remote "rose suite-run" command shcommand += " %s suite-run -vv -n %s" % (rose_bin, suite_name) for key in ["new", "debug", "install-only"]: attr = key.replace("-", "_") + "_mode" if getattr(opts, attr, None) is not None: shcommand += " --%s" % key if opts.log_keep: shcommand += " --log-keep=%s" % opts.log_keep if opts.log_name: shcommand += " --log-name=%s" % opts.log_name if not opts.log_archive_mode: shcommand += " --no-log-archive" shcommand += " --run=%s" % opts.run_mode # Build --remote= option shcommand += " --remote=uuid=%s" % uuid host_confs = [ "root-dir", "root-dir{share}", "root-dir{share/cycle}", "root-dir{work}" ] locs_conf.set([auth]) for key in host_confs: value = self._run_conf(key, host=host, conf_tree=conf_tree) if value is not None: val = self.popen.list_to_shell_str([str(value)]) shcommand += ",%s=%s" % (key, val) locs_conf.set([auth, key], value) command.append(shcommand) proc = self.popen.run_bg(*command) proc_queue.append([proc, command, "ssh", auth]) while proc_queue: sleep(self.SLEEP_PIPE) proc, command, command_name, auth = proc_queue.pop(0) if proc.poll() is None: # put it back in proc_queue proc_queue.append([proc, command, command_name, auth]) continue ret_code = proc.wait() out, err = proc.communicate() if ret_code: raise RosePopenError(command, ret_code, out, err) if command_name == "rsync": self.handle_event(out, level=Event.VV) continue else: self.handle_event(out, level=Event.VV, prefix="[%s] " % auth) for line in out.split("\n"): if "/" + uuid == line.strip(): locs_conf.unset([auth]) break else: filters = {"excludes": [], "includes": []} for name in ["", "log/", "share/", "share/cycle/", "work/"]: filters["excludes"].append(name + uuid) target = auth + ":" + suite_dir_rel cmd = self._get_cmd_rsync(target, **filters) proc_queue.append( [self.popen.run_bg(*cmd), cmd, "rsync", auth]) # Install ends ConfigDumper()(locs_conf, os.path.join("log", "rose-suite-run.locs")) if opts.install_only_mode: return elif opts.run_mode == "reload" and suite_conf_unchanged: conf_name = self.suite_engine_proc.SUITE_CONF self.handle_event(SkipReloadEvent(suite_name, conf_name)) return # Start the suite self.fs_util.chdir("log") ret = 0 # FIXME: should sync files to suite host? if opts.run_mode != "reload": if opts.host: hosts = [opts.host] else: names = shlex.split( conf.get_value(["rose-suite-run", "hosts"], "")) if names: hosts += self.host_selector.expand(names)[0] if (hosts and len(hosts) == 1 and self.host_selector.is_local_host(hosts[0])): host = "localhost" elif hosts: host = self.host_selector(hosts)[0][0] else: host = "localhost" self.handle_event(SuiteHostSelectEvent(suite_name, run_mode, host)) # FIXME: values in environ were expanded in the localhost self.suite_engine_proc.run(suite_name, host, environ, opts.run_mode, args) open("rose-suite-run.host", "w").write(host + "\n") # Disconnect log file handle, so monitoring tool command will no longer # be associated with the log file. self.event_handler.contexts[uuid].handle.close() self.event_handler.contexts.pop(uuid) # Launch the monitoring tool # Note: maybe use os.ttyname(sys.stdout.fileno())? if os.getenv("DISPLAY") and host and opts.gcontrol_mode: self.suite_engine_proc.gcontrol(suite_name, host) return ret
def _run(self, dao, app_runner, config): """Transform and archive suite files. This application is designed to work under "rose task-run" in a suite. """ path = os.path.dirname(os.path.dirname(sys.modules["rose"].__file__)) compress_manager = SchemeHandlersManager( [path], "rose.apps.rose_arch_compressions", ["compress_sources"], None, app_runner) # Set up the targets cycle = os.getenv("ROSE_TASK_CYCLE_TIME") targets = [] for t_key, t_node in sorted(config.value.items()): if t_node.is_ignored() or ":" not in t_key: continue s_key_head, s_key_tail = t_key.split(":", 1) if s_key_head != self.SECTION or not s_key_tail: continue target_prefix = self._get_conf(config, t_node, "target-prefix", default="") try: s_key_tail = env_var_process(s_key_tail) except UnboundEnvironmentVariableError as exc: raise ConfigValueError([t_key, ""], "", exc) target_name = target_prefix + s_key_tail target = RoseArchTarget(target_name) target.command_format = self._get_conf(config, t_node, "command-format", compulsory=True) try: target.command_format % {"sources": "", "target": ""} except KeyError as exc: target.status = target.ST_BAD app_runner.handle_event( RoseArchValueError(target.name, "command-format", target.command_format, type(exc).__name__, exc)) source_str = self._get_conf(config, t_node, "source", compulsory=True) source_prefix = self._get_conf(config, t_node, "source-prefix", default="") target.source_edit_format = self._get_conf(config, t_node, "source-edit-format", default="") try: target.source_edit_format % {"in": "", "out": ""} except KeyError as exc: target.status = target.ST_BAD app_runner.handle_event( RoseArchValueError(target.name, "source-edit-format", target.source_edit_format, type(exc).__name__, exc)) update_check_str = self._get_conf(config, t_node, "update-check", default="md5sum") try: checksum_func = get_checksum_func(update_check_str) except KeyError as exc: raise RoseArchValueError(target.name, "update-check", update_check_str, type(exc).__name__, exc) for source_glob in shlex.split(source_str): paths = glob(source_prefix + source_glob) if not paths: exc = OSError(errno.ENOENT, os.strerror(errno.ENOENT), source_glob) app_runner.handle_event( ConfigValueError([t_key, "source"], source_glob, exc)) target.status = target.ST_BAD continue for path in paths: # N.B. source_prefix may not be a directory name = path[len(source_prefix):] for path_, checksum, _ in get_checksum( path, checksum_func): if checksum is None: # is directory continue if path_: target.sources[checksum] = RoseArchSource( checksum, os.path.join(name, path_), os.path.join(path, path_)) else: # path is a file target.sources[checksum] = RoseArchSource( checksum, name, path) target.compress_scheme = self._get_conf(config, t_node, "compress") if target.compress_scheme: if (compress_manager.get_handler(target.compress_scheme) is None): app_runner.handle_event( ConfigValueError([t_key, "compress"], target.compress_scheme, KeyError(target.compress_scheme))) target.status = target.ST_BAD else: target_base = target.name if "/" in target.name: target_base = target.name.rsplit("/", 1)[1] if "." in target_base: tail = target_base.split(".", 1)[1] if compress_manager.get_handler(tail): target.compress_scheme = tail rename_format = self._get_conf(config, t_node, "rename-format") if rename_format: rename_parser_str = self._get_conf(config, t_node, "rename-parser") if rename_parser_str: try: rename_parser = re.compile(rename_parser_str) except re.error as exc: raise RoseArchValueError(target.name, "rename-parser", rename_parser_str, type(exc).__name__, exc) else: rename_parser = None for source in target.sources.values(): dict_ = {"cycle": cycle, "name": source.name} if rename_parser: match = rename_parser.match(source.name) if match: dict_.update(match.groupdict()) try: source.name = rename_format % dict_ except (KeyError, ValueError) as exc: raise RoseArchValueError(target.name, "rename-format", rename_format, type(exc).__name__, exc) old_target = dao.select(target.name) if old_target is None or old_target != target: dao.delete(target) else: target.status = target.ST_OLD targets.append(target) # Delete from database items that are no longer relevant dao.delete_all(filter_targets=targets) # Update the targets for target in targets: if target.status == target.ST_OLD: app_runner.handle_event(RoseArchEvent(target)) continue target.command_rc = 1 dao.insert(target) if target.status == target.ST_BAD: app_runner.handle_event(RoseArchEvent(target)) continue work_dir = mkdtemp() t_init = time() t_tran, t_arch = t_init, t_init ret_code = None try: # Rename/edit sources target.status = target.ST_BAD rename_required = False for source in target.sources.values(): if source.name != source.orig_name: rename_required = True break if rename_required or target.source_edit_format: for source in target.sources.values(): source.path = os.path.join(work_dir, source.name) source_path_d = os.path.dirname(source.path) app_runner.fs_util.makedirs(source_path_d) if target.source_edit_format: fmt_args = { "in": source.orig_path, "out": source.path } command = target.source_edit_format % fmt_args app_runner.popen.run_ok(command, shell=True) else: app_runner.fs_util.symlink(source.orig_path, source.path) # Compress sources if target.compress_scheme: handler = compress_manager.get_handler( target.compress_scheme) handler.compress_sources(target, work_dir) t_tran = time() # Run archive command sources = [] if target.work_source_path: sources = [target.work_source_path] else: for source in target.sources.values(): sources.append(source.path) sources_str = app_runner.popen.list_to_shell_str(sources) target_str = app_runner.popen.list_to_shell_str([target.name]) command = target.command_format % { "sources": sources_str, "target": target_str } ret_code, out, err = app_runner.popen.run(command, shell=True) t_arch = time() if ret_code: app_runner.handle_event( RosePopenError([command], ret_code, out, err)) else: target.status = target.ST_NEW app_runner.handle_event(err, kind=Event.KIND_ERR) app_runner.handle_event(out) target.command_rc = ret_code dao.update_command_rc(target) finally: app_runner.fs_util.delete(work_dir) app_runner.handle_event( RoseArchEvent(target, [t_init, t_tran, t_arch], ret_code)) return [target.status for target in targets].count(RoseArchTarget.ST_BAD)
def run(self, app_runner, conf_tree, opts, args, uuid, work_files): """ Run multiple instances of a command using sets of specified args""" # Counts for reporting purposes run_ok = 0 run_fail = 0 run_skip = 0 notrun = 0 # Allow naming of individual calls self.invocation_names = conf_tree.node.get_value( [self.BUNCH_SECTION, "names"]) if self.invocation_names: self.invocation_names = shlex.split( rose.env.env_var_process(self.invocation_names)) if len(set(self.invocation_names)) != len(self.invocation_names): raise ConfigValueError([self.BUNCH_SECTION, "names"], self.invocation_names, "names must be unique") self.fail_mode = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "fail-mode"], self.TYPE_CONTINUE_ON_FAIL)) if self.fail_mode not in self.FAIL_MODE_TYPES: raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"], self.fail_mode, "not a valid setting") self.incremental = conf_tree.node.get_value( [self.BUNCH_SECTION, "incremental"], "true") if self.incremental: self.incremental = rose.env.env_var_process(self.incremental) self.isformatted = True self.command = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"])) if not self.command: self.isformatted = False self.command = app_runner.get_command(conf_tree, opts, args) if not self.command: raise CommandNotDefinedError() # Set up command-instances if needed instances = conf_tree.node.get_value( [self.BUNCH_SECTION, "command-instances"]) if instances: try: instances = range(int(rose.env.env_var_process(instances))) except ValueError: raise ConfigValueError( [self.BUNCH_SECTION, "command-instances"], instances, "not an integer value") # Argument lists multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {}) bunch_args_names = [] bunch_args_values = [] for key, val in multi_args.items(): bunch_args_names.append(key) bunch_args_values.append( shlex.split(rose.env.env_var_process(val.value))) # Update the argument values based on the argument-mode argument_mode = conf_tree.node.get_value( [self.BUNCH_SECTION, "argument-mode"], self.DEFAULT_ARGUMENT_MODE) if argument_mode == self.DEFAULT_ARGUMENT_MODE: pass elif argument_mode in self.ACCEPTED_ARGUMENT_MODES: _itertools_cmd = getattr(itertools, argument_mode) if argument_mode == "izip_longest": _permutations = _itertools_cmd(*bunch_args_values, fillvalue="") else: _permutations = _itertools_cmd(*bunch_args_values) # Reconstruct the bunch_args_values _permutations = list(_permutations) for index, _ in enumerate(bunch_args_values): bunch_args_values[index] = [v[index] for v in _permutations] else: raise ConfigValueError([self.BUNCH_SECTION, "argument-mode"], argument_mode, "must be one of %s" % self.ACCEPTED_ARGUMENT_MODES) # Validate runlists if not self.invocation_names: if instances: arglength = len(instances) else: arglength = len(bunch_args_values[0]) self.invocation_names = range(0, arglength) else: arglength = len(self.invocation_names) for item, vals in zip(bunch_args_names, bunch_args_values): if len(vals) != arglength: raise ConfigValueError([self.ARGS_SECTION, item], conf_tree.node.get_value( [self.ARGS_SECTION, item]), "inconsistent arg lengths") if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]): raise ConfigValueError([self.ARGS_SECTION, "command-instances"], conf_tree.node.get_value([ self.ARGS_SECTION, "command-instances" ]), "reserved keyword") if conf_tree.node.get_value([self.ARGS_SECTION, "COMMAND_INSTANCES"]): raise ConfigValueError([self.ARGS_SECTION, "COMMAND_INSTANCES"], conf_tree.node.get_value([ self.ARGS_SECTION, "COMMAND_INSTANCES" ]), "reserved keyword") if instances and arglength != len(instances): raise ConfigValueError([self.BUNCH_SECTION, "command-instances"], instances, "inconsistent arg lengths") # Set max number of processes to run at once max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"]) if max_procs: max_procs = int(rose.env.env_var_process(max_procs)) else: max_procs = arglength if self.incremental == "true": self.dao = RoseBunchDAO(conf_tree) else: self.dao = None commands = {} for vals in zip(range(arglength), self.invocation_names, *bunch_args_values): index, name, bunch_args_vals = vals[0], vals[1], vals[2:] argsdict = dict(zip(bunch_args_names, bunch_args_vals)) if instances: if self.isformatted: argsdict["command-instances"] = instances[index] else: argsdict["COMMAND_INSTANCES"] = str(instances[index]) commands[name] = RoseBunchCmd(name, self.command, argsdict, self.isformatted) procs = {} if 'ROSE_TASK_LOG_DIR' in os.environ: log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s") else: log_format = os.path.join(os.getcwd(), "%s") failed = {} abort = False while procs or (commands and not abort): for key, proc in procs.items(): if proc.poll() is not None: procs.pop(key) if proc.returncode: failed[key] = proc.returncode run_fail += 1 app_runner.handle_event( RosePopenError(str(key), proc.returncode, None, None)) if self.dao: self.dao.update_command_state(key, self.dao.S_FAIL) if self.fail_mode == self.TYPE_ABORT_ON_FAIL: abort = True app_runner.handle_event(AbortEvent()) else: run_ok += 1 app_runner.handle_event(SucceededEvent(key), prefix=self.PREFIX_OK) if self.dao: self.dao.update_command_state(key, self.dao.S_PASS) while len(procs) < max_procs and commands and not abort: key = self.invocation_names[0] command = commands.pop(key) self.invocation_names.pop(0) cmd = command.get_command() cmd_stdout = log_format % command.get_out_file() cmd_stderr = log_format % command.get_err_file() prefix = command.get_log_prefix() bunch_environ = os.environ if not command.isformatted: bunch_environ.update(command.argsdict) bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix if self.dao: if self.dao.check_has_succeeded(key): run_skip += 1 app_runner.handle_event(PreviousSuccessEvent(key), prefix=self.PREFIX_PASS) continue else: self.dao.add_command(key) app_runner.handle_event(LaunchEvent(key, cmd)) procs[key] = app_runner.popen.run_bg( cmd, shell=True, stdout=open(cmd_stdout, 'w'), stderr=open(cmd_stderr, 'w'), env=bunch_environ) sleep(self.SLEEP_DURATION) if abort and commands: for key in self.invocation_names: notrun += 1 cmd = commands.pop(key).get_command() app_runner.handle_event(NotRunEvent(key, cmd), prefix=self.PREFIX_NOTRUN) if self.dao: self.dao.close() # Report summary data in job.out file app_runner.handle_event( SummaryEvent(run_ok, run_fail, run_skip, notrun)) if failed: return 1 else: return 0