def assign_blocked_by(config_file, job_index, blocking_job_indexes, output_file): """Assign the blocked_by attribute for job_name with jobs specified by block_job_indexes (0-based). If no blocking jobs are specified then make all other jobs blocking. \b Examples: 1. Make the last job blocked by all other jobs. jade config assign-blocked-by config.json post-process-job -o new-config.json 2. Select first 10 indexes through shell expansion. jade config assign-blocked-by config.json post-process-job {0..9} -o new-config.json 3. Specify blocking indexes. jade config assign-blocked-by config.json post-process-job 0 1 2 3 -o new-config.json """ if blocking_job_indexes and job_index in blocking_job_indexes: print(f"job_index={job_index} is included in blocking_job_indexes", file=sys.stderr) sys.exit(1) config = create_config_from_file(config_file) if job_index > config.get_num_jobs() - 1: print(f"Invalid job_index={job_index}. Max={config.get_num_jobs() - 1}", file=sys.stderr) sys.exit(1) blocking_jobs = set() main_job = None for i, job in enumerate(config.iter_jobs()): if i == job_index: main_job = job continue if i != job_index and (not blocking_job_indexes or i in blocking_job_indexes): blocking_jobs.add(job.name) assert main_job main_job.set_blocking_jobs(blocking_jobs) config.dump(output_file, indent=2) print(f"Added {len(blocking_jobs)} blocking jobs to {main_job.name} in {output_file}")
def test_submission_groups_per_node_setup(cleanup): config = create_config() config.submission_groups[1].submitter_params.node_setup_script = "node.sh" config.dump(CONFIG_FILE) cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -h {FAKE_HPC_CONFIG} --dry-run" check_run_command(cmd) config = create_config_from_file(Path(OUTPUT) / "config_batch_2.json") assert config.get_default_submission_group( ).submitter_params.node_setup_script == "node.sh"
def run(config_file, name, output, output_format, verbose): """Run auto regression analysis through command line""" os.makedirs(output, exist_ok=True) config = create_config_from_file(config_file) job = config.get_job(name) execution = AutoRegressionExecution(job=job, output=output, output_format=output_format) execution.run()
def __init__(self, config_file, output_dir): self._config = create_config_from_file(config_file) self._config_file = config_file self._output = output_dir self._jobs_output = os.path.join(self._output, JOBS_OUTPUT_DIR) self._results = [] # contains Result objects self._results_dir = os.path.join(self._output, RESULTS_DIR) os.makedirs(self._output, exist_ok=True) os.makedirs(self._jobs_output, exist_ok=True) os.makedirs(self._results_dir, exist_ok=True)
def test_submission_groups_per_node_setup(cleanup): # TODO: this test is no longer in the right place. Belongs in file testing job_config. config = create_config() config.node_setup_command = "node_setup.sh" config.node_teardown_command = "node_teardown.sh" config.dump(CONFIG_FILE) cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -h {FAKE_HPC_CONFIG} --dry-run" check_run_command(cmd) config = create_config_from_file(Path(OUTPUT) / "config_batch_2.json") assert config.node_setup_command == "node_setup.sh" assert config.node_teardown_command == "node_teardown.sh"
def _show(config_file, fields=None, blocked_by=True): config = create_config_from_file(config_file) num_jobs = config.get_num_jobs() print(f"Num jobs: {num_jobs}") if num_jobs == 0: return if config.setup_command is not None: print(f"Setup command: {config.setup_command}") if config.teardown_command is not None: print(f"Teardown command: {config.teardown_command}") print() # generic_command jobs have a command field which is very useful. # Other extensions do not. has_command = False for job in config.iter_jobs(): if isinstance(job, GenericCommandParameters): has_command = True break field_names = ["index", "name"] if has_command: field_names.append("command") if blocked_by: field_names.append("blocked_by (job names)") if fields is not None: field_names += fields table = PrettyTable() table.field_names = field_names for i, job in enumerate(config.iter_jobs()): job_dict = job.serialize() row = [i, job.name] if has_command: row.append(job_dict.get("command", "")) if blocked_by: blocking_jobs = sorted(list(job.get_blocking_jobs())) text = ", ".join(blocking_jobs) if len(text) > 50: text = f"truncated...blocked by {len(blocking_jobs)} jobs" row.append(text) if fields is not None: for field in fields: row.append(job_dict.get(field, "")) table.add_row(row) print(table)
def run_spark_cluster(job_name, jade_runtime_output, verbose, manager_script_and_args): """Create a Spark cluster across multiple nodes. The manager node will invoke the script.""" config = create_config_from_file(Path(jade_runtime_output) / CONFIG_FILE) job = config.get_job(job_name) _set_hostnames(jade_runtime_output) output = {} check_run_command(f"jade cluster am-i-manager {jade_runtime_output}", output) result = output["stdout"].strip() manager_node = _get_manager_node_name(jade_runtime_output) if result == "true": ret = run_cluster_master(job, manager_node, jade_runtime_output, verbose, manager_script_and_args) else: assert result == "false", result ret = run_worker(job, manager_node, jade_runtime_output, verbose) return ret
def _update_with_blocking_jobs(jobs_to_resubmit, output): config = create_config_from_file(Path(output) / CONFIG_FILE) # Any job that was blocked by any of these jobs must also be resubmitted. # Same for any job blocked by one of those. # Account for abnormal ordering where a lower-ID'd job is blocked by a later one. updated_blocking_jobs_by_name = {} max_iter = config.get_num_jobs() for i in range(max_iter): first = len(jobs_to_resubmit) for job in config.iter_jobs(): blocking_jobs = job.get_blocking_jobs() if not blocking_jobs: continue intersecting_jobs = blocking_jobs.intersection(jobs_to_resubmit) if intersecting_jobs: updated_blocking_jobs_by_name[job.name] = intersecting_jobs jobs_to_resubmit.add(job.name) num_added = len(jobs_to_resubmit) - first if num_added == 0: break assert i < max_iter - 1, f"max_iter={max_iter} num_added={num_added} first={first}" return updated_blocking_jobs_by_name
def test_create_config_from_file_missing_file(config_file): """Create should throw FileNotFoundError""" with pytest.raises(FileNotFoundError): create_config_from_file("a" + config_file)
def test_create_config_from_file(config_file): """Create should successfully return config""" config = create_config_from_file(config_file) assert len(config.list_jobs()) == 3