def _get_conf(self, r_node, t_node, key, compulsory=False, default=None): """Return the value of a configuration.""" value = t_node.get_value([key], r_node.get_value([self.SECTION, key], default=default)) if compulsory and not value: raise CompulsoryConfigValueError([key], None, KeyError(key)) if value: try: value = env_var_process(value) except UnboundEnvironmentVariableError as exc: raise ConfigValueError([key], value, exc) return value
def run(self, app_runner, conf_tree, opts, args, uuid, work_files): """ Run multiple instaces of a command using sets of specified args""" # Counts for reporting purposes run_ok = 0 run_fail = 0 run_skip = 0 notrun = 0 # Allow naming of individual calls self.invocation_names = conf_tree.node.get_value( [self.BUNCH_SECTION, "names"]) if self.invocation_names: self.invocation_names = shlex.split( rose.env.env_var_process(self.invocation_names)) if len(set(self.invocation_names)) != len(self.invocation_names): raise ConfigValueError([self.BUNCH_SECTION, "names"], self.invocation_names, "names must be unique") self.fail_mode = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "fail-mode"], self.TYPE_CONTINUE_ON_FAIL)) if self.fail_mode not in self.FAIL_MODE_TYPES: raise ConfigValueError([self.BUNCH_SECTION, "fail-mode"], fail_mode, "not a valid setting") self.incremental = conf_tree.node.get_value( [self.BUNCH_SECTION, "incremental"], "true") if self.incremental: self.incremental = rose.env.env_var_process(self.incremental) multi_args = conf_tree.node.get_value([self.ARGS_SECTION], {}) for key, val in multi_args.items(): multi_args[key].value = rose.env.env_var_process(val.value) self.command_format = rose.env.env_var_process( conf_tree.node.get_value([self.BUNCH_SECTION, "command-format"])) if not self.command_format: raise CompulsoryConfigValueError( [self.BUNCH_SECTION, "command-format"], None, KeyError("command-format")) # Set up command-instances if needed instances = conf_tree.node.get_value( [self.BUNCH_SECTION, "command-instances"]) if instances: try: instances = range(int(rose.env.env_var_process(instances))) except ValueError: raise ConfigValueError( [self.BUNCH_SECTION, "command-instances"], instances, "not an integer value") # Validate runlists if not self.invocation_names: if instances: arglength = len(instances) else: item, val = sorted(multi_args.items())[0] arglength = len(shlex.split(val.value)) self.invocation_names = range(0, arglength) else: arglength = len(self.invocation_names) for item, val in sorted(multi_args.items()): if len(shlex.split(val.value)) != arglength: raise ConfigValueError([self.ARGS_SECTION, item], conf_tree.node.get_value( [self.ARGS_SECTION, item]), "inconsistent arg lengths") if conf_tree.node.get_value([self.ARGS_SECTION, "command-instances"]): raise ConfigValueError([self.ARGS_SECTION, "command-instances"], conf_tree.node.get_value([ self.ARGS_SECTION, "command-instances" ]), "reserved keyword") if instances and arglength != len(instances): raise ConfigValueError([self.BUNCH_SECTION, "command-instances"], instances, "inconsistent arg lengths") # Set max number of processes to run at once max_procs = conf_tree.node.get_value([self.BUNCH_SECTION, "pool-size"]) if max_procs: self.MAX_PROCS = int(rose.env.env_var_process(max_procs)) else: self.MAX_PROCS = arglength if self.incremental == "true": self.dao = RoseBunchDAO(conf_tree) else: self.dao = None commands = {} for index, name in enumerate(self.invocation_names): invocation = RoseBunchCmd(name, self.command_format, index) for key, vals in sorted(multi_args.items()): invocation.argsdict[key] = shlex.split(vals.value)[index] if instances: invocation.argsdict["command-instances"] = instances[index] commands[name] = invocation procs = {} if 'ROSE_TASK_LOG_DIR' in os.environ: log_format = os.path.join(os.environ['ROSE_TASK_LOG_DIR'], "%s") else: log_format = os.path.join(os.getcwd(), "%s") failed = {} abort = False while procs or (commands and not abort): for key, proc in procs.items(): if proc.poll() is not None: procs.pop(key) if proc.returncode: failed[key] = proc.returncode run_fail += 1 app_runner.handle_event( RosePopenError(str(key), proc.returncode, None, None)) if self.dao: self.dao.update_command_state(key, self.dao.S_FAIL) if self.fail_mode == self.TYPE_ABORT_ON_FAIL: abort = True app_runner.handle_event(AbortEvent()) else: run_ok += 1 app_runner.handle_event(SucceededEvent(key), prefix=self.PREFIX_OK) if self.dao: self.dao.update_command_state(key, self.dao.S_PASS) while len(procs) < self.MAX_PROCS and commands and not abort: key = self.invocation_names[0] command = commands.pop(key) self.invocation_names.pop(0) cmd = command.get_command() cmd_stdout = log_format % command.get_out_file() cmd_stderr = log_format % command.get_err_file() prefix = command.get_log_prefix() bunch_environ = os.environ bunch_environ['ROSE_BUNCH_LOG_PREFIX'] = prefix if self.dao: if self.dao.check_has_succeeded(key): run_skip += 1 app_runner.handle_event(PreviousSuccessEvent(key), prefix=self.PREFIX_PASS) continue else: self.dao.add_command(key) app_runner.handle_event(LaunchEvent(key, cmd)) procs[key] = \ app_runner.popen.run_bg(cmd, shell=True, stdout=open(cmd_stdout, 'w'), stderr=open(cmd_stderr, 'w'), env=bunch_environ) sleep(self.SLEEP_DURATION) if abort and commands: for key in self.invocation_names: notrun += 1 cmd = commands.pop(key).get_command() app_runner.handle_event(NotRunEvent(key, cmd), prefix=self.PREFIX_NOTRUN) if self.dao: self.dao.close() # Report summary data in job.out file app_runner.handle_event( SummaryEvent(run_ok, run_fail, run_skip, notrun)) if failed: return 1 else: return 0