def test_resubmit_successful(cleanup): cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT}" check_run_command(cmd) check_run_command(f"{WAIT} --output={OUTPUT} -p 0.01 -t2") summary = ResultsSummary(OUTPUT) assert len(summary.get_failed_results()) == 0 assert len(summary.get_successful_results()) == NUM_COMMANDS check_run_command( f"jade config save-submission-groups {OUTPUT} -c {SG_FILE}") groups = load_data(SG_FILE) assert groups[0]["submitter_params"]["per_node_batch_size"] > NUM_COMMANDS groups[0]["submitter_params"]["per_node_batch_size"] = NUM_COMMANDS dump_data(groups, SG_FILE) check_run_command(f"{RESUBMIT_JOBS} {OUTPUT} -s {SG_FILE} --successful") check_run_command(f"{WAIT} --output={OUTPUT} -p 0.01") summary = ResultsSummary(OUTPUT) assert len(summary.get_failed_results()) == 0 assert len(summary.get_successful_results()) == NUM_COMMANDS check_run_command( f"jade config save-submission-groups {OUTPUT} --force -c {SG_FILE}") groups = load_data(SG_FILE) assert groups[0]["submitter_params"]["per_node_batch_size"] == NUM_COMMANDS
def test_config__assign_blocked_by(cleanup): config = GenericCommandConfiguration() base_cmd = "bash my_script.sh" regular_job_names = [] for i in range(1, 4): cmd = base_cmd + " " + str(i) name = f"job_{i}" job = GenericCommandParameters( command=cmd, name=name, append_job_name=True, append_output_dir=True, ) config.add_job(job) regular_job_names.append(name) pp_name = "post_process" post_process_job = GenericCommandParameters( command="bash run_post_process.sh", name=pp_name, append_job_name=True, append_output_dir=True, ) config.add_job(post_process_job) config_file = CONFIG1 config.dump(config_file, indent=2) ret = run_command( f"jade config assign-blocked-by {CONFIG1} 3 -o {CONFIG2}") assert ret == 0 assert os.path.exists(CONFIG2) config = load_data(CONFIG2) assert sorted(config["jobs"][3]["blocked_by"]) == sorted(regular_job_names) os.remove(CONFIG2) ret = run_command( f"jade config assign-blocked-by {CONFIG1} 3 1 2 -o {CONFIG2}") assert ret == 0 assert os.path.exists(CONFIG2) config = load_data(CONFIG2) expected = [regular_job_names[1], regular_job_names[2]] assert sorted(config["jobs"][3]["blocked_by"]) == sorted(expected) # Include the pp job in blocking-job-indexes. ret = run_command( f"jade config assign-blocked-by {CONFIG1} 3 1 2 3 -o {CONFIG2}") assert ret != 0 # Invalid job index ret = run_command( f"jade config assign-blocked-by {CONFIG1} 47 1 2 -o {CONFIG2}") assert ret != 0
def test_config__filter_copy(cleanup): ret = run_command(f"jade auto-config demo tests/data/demo -c {CONFIG1}") assert ret == 0 assert os.path.exists(CONFIG1) ret = run_command(f"jade config filter {CONFIG1} -o {CONFIG2}") assert ret == 0 assert os.path.exists(CONFIG2) config1 = load_data(CONFIG1) config2 = load_data(CONFIG2) assert config1 == config2
def test_config__filter_range(cleanup): ret = run_command(f"jade auto-config demo tests/data/demo -c {CONFIG1}") assert ret == 0 assert os.path.exists(CONFIG1) ret = run_command(f"jade config filter {CONFIG1} -o {CONFIG2} 0 1") assert ret == 0 assert os.path.exists(CONFIG2) config1 = load_data(CONFIG1) config2 = load_data(CONFIG2) assert config2["jobs"] == [config1["jobs"][0], config1["jobs"][1]]
def create_config(): num_commands = 5 commands = ['echo "hello world"'] * num_commands with open(TEST_FILENAME, "w") as f_out: for command in commands: f_out.write(command + "\n") inputs = GenericCommandInputs(TEST_FILENAME) config = GenericCommandConfiguration(job_inputs=inputs) jobs = list(inputs.iter_jobs()) for i, job_param in enumerate(jobs): if i < 3: job_param.submission_group = "group1" else: job_param.submission_group = "group2" config.add_job(job_param) hpc_config1 = load_data(FAKE_HPC_CONFIG) hpc_config2 = copy.deepcopy(hpc_config1) hpc_config1["hpc"]["walltime"] = "1:00:00" hpc_config2["hpc"]["walltime"] = "5:00:00" params1 = SubmitterParams(hpc_config=hpc_config1, per_node_batch_size=3) params2 = SubmitterParams(hpc_config=hpc_config2, per_node_batch_size=1) group1 = SubmissionGroup(name="group1", submitter_params=params1) group2 = SubmissionGroup(name="group2", submitter_params=params2) config.append_submission_group(group1) config.append_submission_group(group2) return config
def test_resubmit_failed(cleanup): cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT}" ret = run_command(cmd) assert ret == 0 ret = run_command(f"{WAIT} --output={OUTPUT} -p 0.01") assert ret == 0 agg = ResultsAggregator.load(OUTPUT) results = agg.get_results_unsafe() assert results for result in results: assert result.return_code == 0 x = results[0] results[0] = Result(x.name, 1, x.status, x.exec_time_s, x.completion_time) agg._write_results(results) results_filename = os.path.join(OUTPUT, RESULTS_FILE) final_results = load_data(results_filename) final_results["results"][0]["return_code"] = 1 final_results["results_summary"]["num_failed"] = 1 final_results["results_summary"]["num_successful"] -= 1 dump_data(final_results, results_filename) summary = ResultsSummary(OUTPUT) assert summary.get_failed_results()[0].name == "1" ret = run_command(f"{RESUBMIT_JOBS} {OUTPUT}") assert ret == 0 ret = run_command(f"{WAIT} --output={OUTPUT} -p 0.01") assert ret == 0 summary = ResultsSummary(OUTPUT) assert len(summary.get_successful_results()) == NUM_COMMANDS
def deserialize(cls, filename_or_data, do_not_deserialize_jobs=False): """Create a class instance from a saved configuration file. Parameters ---------- filename : str | dict path to configuration file or that file loaded as a dict do_not_deserialize_jobs : bool Set to True to avoid the overhead of loading all jobs from disk. Job_names will be stored instead of jobs. Returns ------- class Raises ------ InvalidParameter Raised if the config file has invalid parameters. """ if isinstance(filename_or_data, str): data = load_data(filename_or_data) else: data = filename_or_data # Don't create an inputs object. It can be very expensive and we don't # need it unless the user wants to change the config. # TODO: implement user-friendly error messages when they try to access # inputs. inputs = None data["do_not_deserialize_jobs"] = do_not_deserialize_jobs return cls(inputs, **data)
def test_resubmit_missing(cleanup): cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT}" ret = run_command(cmd) assert ret == 0 ret = run_command(f"{WAIT} --output={OUTPUT} -p 0.01") assert ret == 0 agg = ResultsAggregator.load(OUTPUT) results = agg.get_results_unsafe() assert results for result in results: assert result.return_code == 0 results.pop() agg._write_results(results) results_filename = os.path.join(OUTPUT, RESULTS_FILE) final_results = load_data(results_filename) missing = final_results["results"].pop() final_results["results_summary"]["num_missing"] = 1 final_results["results_summary"]["num_successful"] -= 1 final_results["missing_jobs"] = [missing["name"]] dump_data(final_results, results_filename) summary = ResultsSummary(OUTPUT) assert len(summary.get_failed_results()) == 0 assert len(summary.get_successful_results()) == NUM_COMMANDS - 1 ret = run_command(f"{RESUBMIT_JOBS} {OUTPUT}") assert ret == 0 ret = run_command(f"{WAIT} --output={OUTPUT} -p 0.01") assert ret == 0 summary = ResultsSummary(OUTPUT) assert len(summary.get_successful_results()) == NUM_COMMANDS
def test_custom_interval(config_file): check_run_command(f"{CMD} -R aggregation -r 3 -c {config_file}") assert config_file.exists() data = load_data(config_file) assert data["resource_monitor_interval"] == 3 assert data[ "resource_monitor_type"] == ResourceMonitorType.AGGREGATION.value
def _show(config_file, fields): cfg = load_data(config_file) jobs = cfg["jobs"] print(f"Extension: {cfg['extension']}") print(f"Num jobs: {len(cfg['jobs'])}") if not jobs: return for field in fields: if field not in jobs[0]: print(f"field={field} is not a job field in {cfg['extension']}") sys.exit(1) field_names = ["index"] if "name" in jobs[0]: field_names.append("name") else: field_names.append(list(jobs[0].keys())[0]) if "blocked_by" in jobs[0]: field_names.append("blocked_by") table = PrettyTable() table.field_names = field_names + list(fields) for i, job in enumerate(jobs): row = [i] + [job[x] for x in field_names[1:]] table.add_row(row) print(table)
def load_config_from_file(cls, config_file): """Loads config from given toml file Parameters ---------- config_file : str Returns ------- module_name : str module which contains class that needs to run class_name : str class implementing post process to run data : dict optional dictionary of additional data to send to post process """ config = load_data(config_file) module_name = None class_name = None data = {} if "module" in config.keys(): module_name = config["module"] if "class" in config.keys(): class_name = config["class"] if "data" in config.keys(): for data_index in config["data"]: data[data_index] = config["data"][data_index] return module_name, class_name, data
def create_config(self, config_file): """Creates a configuration from a config file. Parameters ---------- config_file : str | dict HPC config Returns ------- dict """ if isinstance(config_file, dict): config = config_file else: if not os.path.exists(config_file): raise FileNotFoundError( f"HPC config file {config_file} does not exist") config = load_data(config_file) for param in self.get_required_config_params(): if param not in config["hpc"]: raise InvalidParameter(f"missing HPC config parameter {param}") for param, val in self.get_optional_config_params().items(): if param not in config["hpc"]: config["hpc"][param] = val return config
def _get_job_by_name(self, name): assert self._jobs_directory is not None filename = os.path.join(self._jobs_directory, name) + ".json" assert os.path.exists(filename), filename job = load_data(filename) param_class = self.job_parameters_class(job["extension"]) return param_class.deserialize(job)
def deserialize(cls, filename_or_data, do_not_deserialize_jobs=False): """Create a class instance from a saved configuration file. Parameters ---------- filename : str | dict path to configuration file or that file loaded as a dict do_not_deserialize_jobs : bool Set to True to avoid the overhead of loading all jobs from disk. Job_names will be stored instead of jobs. Returns ------- class Raises ------ InvalidParameter Raised if the config file has invalid parameters. """ if isinstance(filename_or_data, str): data = load_data(filename_or_data) else: data = filename_or_data data["do_not_deserialize_jobs"] = do_not_deserialize_jobs return cls(**data)
def _handle_legacy_file(self, legacy_file): for raw_event in load_data(legacy_file): event = deserialize_event(raw_event) self._events[event.name].append(event) self._save_events_summary() os.remove(legacy_file) logger.info("Converted events to new format")
def test_singularity_params(config_file): # Not a real container. It needs to be a path that exists container = "jade" check_run_command(f"{CMD} -S -C {container} -c {config_file}") assert config_file.exists() data = load_data(config_file) assert data["singularity_params"] is not None assert data["singularity_params"]["enabled"] assert data["singularity_params"]["container"] == "jade"
def test_estimated_run_time(cleanup): # walltime is 240 minutes # 10-minute jobs # Each of 4 cores can each complete 24 jobs. 4 * 24 = 96 jobs # 100 jobs will take two batches. cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -t -n2 -q4" check_run_command(cmd) check_run_command(f"{WAIT} --output={OUTPUT} -p 0.01") batch_config_1 = Path(OUTPUT) / "config_batch_1.json" assert os.path.exists(batch_config_1) batch_config_2 = Path(OUTPUT) / "config_batch_2.json" assert os.path.exists(batch_config_2) config1 = load_data(batch_config_1) assert len(config1["jobs"]) == 96 config2 = load_data(batch_config_2) assert len(config2["jobs"]) == 4
def test_submission_groups(cleanup): config = create_config() config.dump(CONFIG_FILE) cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -h {FAKE_HPC_CONFIG} -p 0.1" check_run_command(cmd) output_path = Path(OUTPUT) config_batch_files = list(output_path.glob("config_batch*.json")) assert len(config_batch_files) == 3 batch1 = load_data(output_path / "config_batch_1.json") assert len(batch1["jobs"]) == 3 batch2 = load_data(output_path / "config_batch_2.json") assert len(batch2["jobs"]) == 1 assert batch2["jobs"][0]["job_id"] == 4 batch3 = load_data(output_path / "config_batch_3.json") assert len(batch3["jobs"]) == 1 assert batch3["jobs"][0]["job_id"] == 5
def create_config_from_file(filename, **kwargs): """Create instance of a JobConfiguration from a config file. Returns ------- JobConfiguration """ data = load_data(filename) return deserialize_config(data, **kwargs)
def add_submission_group(params_file, name, config_file): """Add a submission group with parameters defined in params_file to config_file.""" config = load_data(config_file) for group in config["submission_groups"]: if name == group["name"]: print(f"Error: {name} is already stored in {config_file}", file=sys.stderr) sys.exit(1) params = load_data(params_file) group = { "name": name, "submitter_params": params, } # Make sure it parses. SubmissionGroup(**group) config["submission_groups"].append(group) dump_data(config, config_file, indent=2) print(f"Updated {config_file} with submission group {name}.")
def get_available_parameters(self): """Collect all available auto-regression jobs""" inputs_file = os.path.join(self._base_directory, self.INPUTS_FILE) inputs = load_data(inputs_file) for param in inputs: job = AutoRegressionParameters(country=param["country"], data=os.path.join( self._base_directory, param["data"])) self._parameters[job.name] = job
def test_job_order(generic_command_fixture): num_jobs = 50 commands = ["echo hello world"] * num_jobs with open(TEST_FILENAME, "w") as f_out: for command in commands: f_out.write(command + "\n") inputs = GenericCommandInputs(TEST_FILENAME) config = GenericCommandConfiguration() for job_param in inputs.iter_jobs(): config.add_job(job_param) assert config.get_num_jobs() == num_jobs job = config.get_job("1") for i in range(10, 15): job.blocked_by.add(i) config.get_job("2").blocked_by.add("1") config.get_job("21").blocked_by.add("30") config.get_job("41").blocked_by.add("50") config.dump(CONFIG_FILE) cmd = (f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} " "--per-node-batch-size=10 " "--max-nodes=4 " "--poll-interval=0.1 " f"--hpc-config {FAKE_HPC_CONFIG} " "--num-processes=10") check_run_command(cmd) check_run_command(f"{WAIT} --output={OUTPUT} --poll-interval=0.01") result_summary = ResultsSummary(OUTPUT) results = result_summary.list_results() assert len(results) == num_jobs tracker = {x.name: x for x in results} for i in range(10, 15): assert tracker["1"].completion_time > tracker[str(i)].completion_time assert tracker["2"].completion_time > tracker["1"].completion_time assert tracker["21"].completion_time > tracker["30"].completion_time assert tracker["41"].completion_time > tracker["50"].completion_time # Verify that stats are summarized correctly with aggregation mode. stats_text = Path(OUTPUT) / "stats.txt" assert stats_text.exists() assert "Average" in stats_text.read_text() stats_json = Path(OUTPUT) / "stats_summary.json" assert stats_json.exists() stats = load_data(stats_json) assert stats assert "batch" in stats[0]
def _check_registry_config(self, filename): data = load_data(filename) if isinstance(data, list): # Workaround to support the old registry format. 03/06/2020 # It can be removed eventually. new_data = { "extensions": data, "logging": DEFAULT_REGISTRY["logging"], } dump_data(new_data, self.registry_filename, indent=4) print( "\nReformatted registry. Refer to `jade extensions --help` " "for instructions on adding logging for external packages.\n") data = new_data format = data.get("format_version", "v0.1.0") if format == "v0.1.0": self.reset_defaults() data = load_data(filename) print("\nWARNING: Reformatted registry. You will need to " "re-register any external extensions.\n") return data
def main(): status = load_data(os.environ["JADE_PIPELINE_STATUS_FILE"]) cur_stage = status["stages"][-1] cur_stage_output = cur_stage["output_directory"] previous_stage = status["stages"][-2] previous_stage_output = previous_stage["output_directory"] script = "jade/extensions/demo/merge_pred_gdp.py" with open(PRED_GDP_COMMANDS_FILE, "w") as f_out: cmd = f"python {script} run {previous_stage_output} {cur_stage_output}" f_out.write(cmd + "\n") cmd = "jade config create pred_gdp_commands.txt -c config-stage2.json" sys.exit(run_command(cmd))
def create_config_from_file(filename, **kwargs): """Create instance of a JobConfiguration from a config file. Returns ------- JobConfiguration """ data = load_data(filename) format = data.get("format_version", None) if format is None: upgrade_config_file(data, filename) return deserialize_config(data, **kwargs)
def _remove_demo_extension(self): registry_file = pathlib.Path.home() / self._REGISTRY_FILENAME if not registry_file.exists(): return data = load_data(registry_file) found = False for i, ext in enumerate(data["extensions"]): if ext["name"] == "demo": data["extensions"].pop(i) found = True break if found: dump_data(data, registry_file, indent=2)
def test_job_configuration__check_job_dependencies_estimate(job_fixture): with open(TEST_FILENAME, "w") as f_out: f_out.write("echo hello world\n") inputs = GenericCommandInputs(TEST_FILENAME) config = GenericCommandConfiguration(job_inputs=inputs) for job_param in inputs.iter_jobs(): config.add_job(job_param) assert config.get_num_jobs() == 1 hpc_config = HpcConfig(**load_data(FAKE_HPC_CONFIG)) params = SubmitterParams(hpc_config=hpc_config, per_node_batch_size=0) with pytest.raises(InvalidConfiguration): config.check_job_dependencies(params)
def create_config_from_previous_run(config_file, output, result_type="successful", **kwargs): """Create instance of a JobConfiguration from a previous config file, returning only those of the type given Parameters ---------- config_file : str location of config output : str location of previous results result_type : string type of results Returns ------- JobConfiguration Raises ------ InvalidParameter Raised if result_type is not successful or failed """ allowed_types = ["successful", "failed", "missing"] if result_type not in allowed_types: raise InvalidParameter(f"given result type invalid: {result_type}") config = deserialize_config(load_data(config_file)) summary = ResultsSummary(output) results_of_type = [] if result_type == "successful": results_of_type = summary.get_successful_results() elif result_type == "failed": results_of_type = summary.get_failed_results() elif result_type == "missing": results_of_type = summary.get_missing_jobs(config.iter_jobs()) parameters = [] # Note that both jobs and results have `.name`. for result in results_of_type: job_parameters = config.get_job(result.name) parameters.append(job_parameters) config.reconfigure_jobs(parameters) return deserialize_config(config.serialize(), **kwargs)
def __init__(self, config_file, output): self._output = output os.makedirs(self._output, exist_ok=True) self._config_file = os.path.join(self._output, self.CONFIG_FILENAME) shutil.copyfile(config_file, self._config_file) self._config = load_data(self._config_file) self._stages = self._config["stages"] self._status_file = os.path.join(self._output, self.STATUS_FILENAME) self._cur_stage_id = 1 self._status_info = { "current_stage_id": self._cur_stage_id, "stages": [], "output_directory": self._output, "config_file": self._config_file, }
def main(): config = PipelineConfig( **load_data(os.environ["JADE_PIPELINE_STATUS_FILE"])) cur_stage = config.stages[-1] cur_stage_output = cur_stage.path previous_stage = config.stages[-2] previous_stage_output = previous_stage.path script = "jade/extensions/demo/merge_pred_gdp.py" with open(PRED_GDP_COMMANDS_FILE, "w") as f_out: cmd = f"python {script} run {previous_stage_output} {cur_stage_output}" f_out.write(cmd + "\n") cmd = "jade config create pred_gdp_commands.txt -c config-stage2.json" sys.exit(run_command(cmd))