def queue_meets_spec(self, queue, num_nodes, num_tasks, walltime=None, job=None): specs = self.get_queue_specs(queue) nodemin, nodemax, jobname, walltimemax, jobmin, jobmax, strict = specs # A job name match automatically meets spec if job is not None and jobname is not None: return jobname == job if nodemin is not None and num_nodes < nodemin or \ nodemax is not None and num_nodes > nodemax or \ jobmin is not None and num_tasks < jobmin or \ jobmax is not None and num_tasks > jobmax: return False if walltime is not None and walltimemax is not None and strict: walltime_s = convert_to_seconds(walltime) walltimemax_s = convert_to_seconds(walltimemax) if walltime_s > walltimemax_s: return False return True
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("TESTRUNPASS_P69.f19_g16_rx1.A.melvin_gnu.testmod") '0:13:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ _, _, _, _, machine, compiler, _ = CIME.utils.parse_test_name( test_full_name) expect(machine is not None, "{} is not a full test name".format(test_full_name)) best_time = None suites = get_test_suites() for suite in suites: tests = get_test_suite(suite, machine=machine, compiler=compiler, skip_inherit=True) rec_time = get_test_data(suite)[1] if test_full_name in tests and rec_time is not None and \ (best_time is None or convert_to_seconds(rec_time) < convert_to_seconds(best_time)): best_time = rec_time return best_time
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("ERS.f19_f19.ICLM45.melvin_gnu") '0:45:00' >>> get_recommended_test_time("SMS_D_Ld1.ne30_oECv3_ICG.A_WCYCL1850S_CMIP6.sandiatoss3_intel.allactive-v1cmip6") '03:00:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ _, _, _, _, machine, compiler, _ = CIME.utils.parse_test_name(test_full_name) expect(machine is not None, "{} is not a full test name".format(test_full_name)) best_time = None suites = get_test_suites() for suite in suites: tests = get_test_suite(suite, machine=machine, compiler=compiler, skip_inherit=True) rec_time = get_test_data(suite)[1] if test_full_name in tests and rec_time is not None and \ (best_time is None or convert_to_seconds(rec_time) < convert_to_seconds(best_time)): best_time = rec_time return best_time
def queue_meets_spec(self, queue, num_nodes, num_tasks, walltime=None, job=None): specs = self.get_queue_specs(queue) if specs is None: logger.warning("WARNING: queue '{}' is unknown to this system".format(queue)) return True nodemin, nodemax, jobname, walltimemax, jobmin, jobmax, strict = specs # A job name match automatically meets spec if job is not None and jobname is not None: return jobname == job if nodemin is not None and num_nodes < nodemin or \ nodemax is not None and num_nodes > nodemax or \ jobmin is not None and num_tasks < jobmin or \ jobmax is not None and num_tasks > jobmax: return False if walltime is not None and walltimemax is not None and strict: walltime_s = convert_to_seconds(walltime) walltimemax_s = convert_to_seconds(walltimemax) if walltime_s > walltimemax_s: return False return True
def queue_meets_spec(self, queue, num_nodes, num_tasks, walltime=None, job=None): specs = self.get_queue_specs(queue) if specs is None: logger.warning("WARNING: queue '{}' is unknown to this system".format(queue)) return True nodemin, nodemax, jobname, walltimemax, jobmin, jobmax, strict = specs # A job name match automatically meets spec if job is not None and jobname is not None: return jobname == job if nodemin is not None and num_nodes < nodemin or \ nodemax is not None and num_nodes > nodemax or \ jobmin is not None and num_tasks < jobmin or \ jobmax is not None and num_tasks > jobmax: return False if walltime is not None and walltimemax is not None and strict: walltime_s = convert_to_seconds(walltime) walltimemax_s = convert_to_seconds(walltimemax) if walltime_s > walltimemax_s: return False return True
def queue_meets_spec(self, queue, num_pes, walltime=None, job=None): specs = self.get_queue_specs(queue) if specs is None: logger.warning("WARNING: queue '%s' is unknown to this system" % queue) return True jobmin, jobmax, jobname, walltimemax, strict = specs # A job name match automatically meets spec if job is not None and jobname is not None: return jobname == job if jobmin is not None and num_pes < int(jobmin): return False if jobmax is not None and num_pes > int(jobmax): return False if walltime is not None and walltimemax is not None and strict: walltime_s = convert_to_seconds(walltime) walltimemax_s = convert_to_seconds(walltimemax) if walltime_s > walltimemax_s: return False return True
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("TESTRUNPASS_P69.f19_g16_rx1.A.melvin_gnu.testmod") '0:13:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ best_time = None suites = get_test_suites() for suite in suites: rec_time = get_test_data(suite)[1] if ( suite_has_test(suite, test_full_name, skip_inherit=True) and rec_time is not None and ( best_time is None or convert_to_seconds(rec_time) < convert_to_seconds(best_time) ) ): best_time = rec_time return best_time
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("TESTRUNPASS_P69.f19_g16_rx1.A.melvin_gnu.testmod") '0:13:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ _, _, _, _, machine, compiler, _ = CIME.utils.parse_test_name(test_full_name) expect(machine is not None, "{} is not a full test name".format(test_full_name)) best_time = None suites = get_test_suites() for suite in suites: tests = get_test_suite(suite, machine=machine, compiler=compiler, skip_inherit=True) rec_time = get_test_data(suite)[1] if test_full_name in tests and rec_time is not None and \ (best_time is None or convert_to_seconds(rec_time) < convert_to_seconds(best_time)): best_time = rec_time return best_time
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("ERP_Ln9.ne30_ne30.FC5.melvin_gun.cam-outfrq9s") '0:45:00' >>> get_recommended_test_time("PET_Ln9.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") '03:00:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ _, _, _, _, machine, compiler, _ = CIME.utils.parse_test_name( test_full_name) expect(machine is not None, "{} is not a full test name".format(test_full_name)) best_time = None suites = get_test_suites() for suite in suites: _, rec_time, tests_raw = _ALL_TESTS[suite] for item in tests_raw: test_mod = None if (isinstance(item, six.string_types)): test_name = item else: test_name = item[0] if (len(item) == 2): test_mod = item[1] else: test_mod_machines = [item[2]] if isinstance( item[2], six.string_types) else item[2] if (machine in test_mod_machines): test_mod = item[1] full_test = CIME.utils.get_full_test_name(test_name, machine=machine, compiler=compiler, testmod=test_mod) if full_test == test_full_name and rec_time is not None: if best_time is None or \ convert_to_seconds(rec_time) < convert_to_seconds(best_time): best_time = rec_time return best_time
def get_recommended_test_time(test_full_name): ############################################################################### """ >>> get_recommended_test_time("ERS.f19_g16_rx1.A.melvin_gnu") '0:10:00' >>> get_recommended_test_time("ERP_Ln9.ne30_ne30.FC5.melvin_gun.cam-outfrq9s") '0:45:00' >>> get_recommended_test_time("PET_Ln9.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") '03:00:00' >>> get_recommended_test_time("PET_Ln20.ne30_ne30.FC5.sandiatoss3_intel.cam-outfrq9s") >>> """ _, _, _, _, machine, compiler, _ = CIME.utils.parse_test_name(test_full_name) expect(machine is not None, "{} is not a full test name".format(test_full_name)) best_time = None suites = get_test_suites() for suite in suites: _, rec_time, tests_raw = _ALL_TESTS[suite] for item in tests_raw: test_mod = None if (isinstance(item, six.string_types)): test_name = item else: test_name = item[0] if (len(item) == 2): test_mod = item[1] else: test_mod_machines = [item[2]] if isinstance(item[2], six.string_types) else item[2] if (machine in test_mod_machines): test_mod = item[1] full_test = CIME.utils.get_full_test_name(test_name, machine=machine, compiler=compiler, testmod=test_mod) if full_test == test_full_name and rec_time is not None: if best_time is None or \ convert_to_seconds(rec_time) < convert_to_seconds(best_time): best_time = rec_time return best_time
def sort_by_time(test_one, test_two): ############################################################################### """ >>> tests = get_full_test_names(["cime_tiny"], "melvin", "gnu") >>> tests.extend(get_full_test_names(["cime_developer"], "melvin", "gnu")) >>> tests.append("A.f19_f19.A.melvin_gnu") >>> tests.sort(cmp=sort_by_time) >>> tests ['DAE.f19_f19.A.melvin_gnu', 'ERI.f09_g16.X.melvin_gnu', 'ERIO.f09_g16.X.melvin_gnu', 'ERP.f45_g37_rx1.A.melvin_gnu', 'ERR.f45_g37_rx1.A.melvin_gnu', 'ERS.ne30_g16_rx1.A.melvin_gnu', 'IRT_N2.f19_g16_rx1.A.melvin_gnu', 'NCK_Ld3.f45_g37_rx1.A.melvin_gnu', 'PET_P32.f19_f19.A.melvin_gnu', 'PRE.f19_f19.ADESP.melvin_gnu', 'PRE.f19_f19.ADESP_TEST.melvin_gnu', 'SEQ_Ln9.f19_g16_rx1.A.melvin_gnu', 'SMS.T42_T42.S.melvin_gnu', 'SMS_D_Ln9.f19_g16_rx1.A.melvin_gnu', 'ERS.f19_g16_rx1.A.melvin_gnu', 'NCK.f19_g16_rx1.A.melvin_gnu', 'A.f19_f19.A.melvin_gnu'] """ rec1, rec2 = get_recommended_test_time( test_one), get_recommended_test_time(test_two) if rec1 == rec2: return cmp(test_one, test_two) else: if rec2 is None: return -1 elif rec1 is None: return 1 else: return cmp(convert_to_seconds(rec2), convert_to_seconds(rec1))
def queue_meets_spec(self, queue, num_pes, walltime=None, job=None): jobmin, jobmax, jobname, walltimemax, strict = self.get_queue_specs( queue) # A job name match automatically meets spec if job is not None and jobname is not None: return jobname == job if jobmin is not None and num_pes < int(jobmin): return False if jobmax is not None and num_pes > int(jobmax): return False if walltime is not None and walltimemax is not None and strict: walltime_s = convert_to_seconds(walltime) walltimemax_s = convert_to_seconds(walltimemax) if walltime_s > walltimemax_s: return False return True
def sort_by_time(test_one, test_two): ############################################################################### """ >>> tests = get_full_test_names(["cime_tiny"], "melvin", "gnu") >>> tests.extend(get_full_test_names(["cime_developer"], "melvin", "gnu")) >>> tests.append("A.f19_f19.A.melvin_gnu") >>> tests.sort(cmp=sort_by_time) >>> tests ['DAE.f19_f19.A.melvin_gnu', 'ERI.f09_g16.X.melvin_gnu', 'ERIO.f09_g16.X.melvin_gnu', 'ERP.f45_g37_rx1.A.melvin_gnu', 'ERR.f45_g37_rx1.A.melvin_gnu', 'ERS.ne30_g16_rx1.A.melvin_gnu', 'IRT_N2.f19_g16_rx1.A.melvin_gnu', 'NCK_Ld3.f45_g37_rx1.A.melvin_gnu', 'PET_P32.f19_f19.A.melvin_gnu', 'PRE.f19_f19.ADESP.melvin_gnu', 'PRE.f19_f19.ADESP_TEST.melvin_gnu', 'SEQ_Ln9.f19_g16_rx1.A.melvin_gnu', 'SMS.T42_T42.S.melvin_gnu', 'SMS_D_Ln9.f19_g16_rx1.A.melvin_gnu', 'ERS.f19_g16_rx1.A.melvin_gnu', 'NCK.f19_g16_rx1.A.melvin_gnu', 'A.f19_f19.A.melvin_gnu'] """ rec1, rec2 = get_recommended_test_time(test_one), get_recommended_test_time(test_two) if rec1 == rec2: return (test_one > test_two) - (test_two < test_one) else: if rec2 is None: return -1 elif rec1 is None: return 1 else: a = convert_to_seconds(rec2) b = convert_to_seconds(rec1) return (a < b) - (b < a)
def _get_time_est(test, baseline_root, as_int=False, use_cache=False, raw=False): ############################################################################### if test in _TIME_CACHE and use_cache: return _TIME_CACHE[test] recommended_time = get_recommended_test_time_based_on_past(baseline_root, test, raw=raw) if recommended_time is None: recommended_time = get_recommended_test_time(test) if as_int: if recommended_time is None: recommended_time = 9999999999 else: recommended_time = convert_to_seconds(recommended_time) if use_cache: _TIME_CACHE[test] = recommended_time return recommended_time
def key_test_time(test_full_name): ############################################################################### result = get_recommended_test_time(test_full_name) return 99999999 if result is None else convert_to_seconds(result)
def single_submit_impl( machine_name, test_id, proc_pool, _, args, job_cost_map, wall_time, test_root ): ############################################################################### mach = Machines(machine=machine_name) expect( mach.has_batch_system(), "Single submit does not make sense on non-batch machine '%s'" % mach.get_machine_name(), ) machine_name = mach.get_machine_name() # # Compute arg list for second call to create_test # new_args = list(args) new_args.remove("--single-submit") new_args.append("--no-batch") new_args.append("--use-existing") no_arg_is_a_test_id_arg = True no_arg_is_a_proc_pool_arg = True no_arg_is_a_machine_arg = True for arg in new_args: if arg == "-t" or arg.startswith("--test-id"): no_arg_is_a_test_id_arg = False elif arg.startswith("--proc-pool"): no_arg_is_a_proc_pool_arg = False elif arg == "-m" or arg.startswith("--machine"): no_arg_is_a_machine_arg = True if no_arg_is_a_test_id_arg: new_args.append("-t %s" % test_id) if no_arg_is_a_proc_pool_arg: new_args.append("--proc-pool %d" % proc_pool) if no_arg_is_a_machine_arg: new_args.append("-m %s" % machine_name) # # Resolve batch directives manually. There is currently no other way # to do this without making a Case object. Make a throwaway case object # to help us here. # testcase_dirs = glob.glob("%s/*%s*/TestStatus" % (test_root, test_id)) expect(testcase_dirs, "No test case dirs found!?") first_case = os.path.abspath(os.path.dirname(testcase_dirs[0])) with Case(first_case, read_only=False) as case: env_batch = case.get_env("batch") submit_cmd = env_batch.get_value("batch_submit", subgroup=None) submit_args = env_batch.get_submit_args(case, "case.test") tasks_per_node = mach.get_value("MAX_MPITASKS_PER_NODE") num_nodes = int(math.ceil(float(proc_pool) / tasks_per_node)) if wall_time is None: wall_time = compute_total_time(job_cost_map, proc_pool) wall_time_bab = convert_to_babylonian_time(int(wall_time)) else: wall_time_bab = wall_time queue = env_batch.select_best_queue(num_nodes, proc_pool, walltime=wall_time_bab) wall_time_max_bab = env_batch.get_queue_specs(queue)[3] if wall_time_max_bab is not None: wall_time_max = convert_to_seconds(wall_time_max_bab) if wall_time_max < wall_time: wall_time = wall_time_max wall_time_bab = convert_to_babylonian_time(wall_time) overrides = { "job_id": "create_test_single_submit_%s" % test_id, "num_nodes": num_nodes, "tasks_per_node": tasks_per_node, "totaltasks": tasks_per_node * num_nodes, "job_wallclock_time": wall_time_bab, "job_queue": env_batch.text(queue), } directives = env_batch.get_batch_directives(case, "case.test", overrides=overrides) # # Make simple submit script and submit # script = "#! /bin/bash\n" script += "\n%s" % directives script += "\n" script += "cd %s\n" % os.getcwd() script += "%s %s\n" % (__file__, " ".join(new_args)) submit_cmd = "%s %s" % (submit_cmd, submit_args) logger.info("Script:\n%s" % script) run_cmd_no_fail( submit_cmd, input_str=script, arg_stdout=None, arg_stderr=None, verbose=True )
def create_test( test_names, test_data, compiler, machine_name, no_run, no_build, no_setup, no_batch, test_root, baseline_root, clean, baseline_cmp_name, baseline_gen_name, namelists_only, project, test_id, parallel_jobs, walltime, single_submit, proc_pool, use_existing, save_timing, queue, allow_baseline_overwrite, output_root, wait, force_procs, force_threads, mpilib, input_dir, pesfile, run_count, mail_user, mail_type, check_throughput, check_memory, ignore_namelists, ignore_memleak, allow_pnl, non_local, single_exe, workflow, chksum, ): ############################################################################### impl = TestScheduler( test_names, test_data=test_data, no_run=no_run, no_build=no_build, no_setup=no_setup, no_batch=no_batch, test_root=test_root, test_id=test_id, baseline_root=baseline_root, baseline_cmp_name=baseline_cmp_name, baseline_gen_name=baseline_gen_name, clean=clean, machine_name=machine_name, compiler=compiler, namelists_only=namelists_only, project=project, parallel_jobs=parallel_jobs, walltime=walltime, proc_pool=proc_pool, use_existing=use_existing, save_timing=save_timing, queue=queue, allow_baseline_overwrite=allow_baseline_overwrite, output_root=output_root, force_procs=force_procs, force_threads=force_threads, mpilib=mpilib, input_dir=input_dir, pesfile=pesfile, run_count=run_count, mail_user=mail_user, mail_type=mail_type, allow_pnl=allow_pnl, non_local=non_local, single_exe=single_exe, workflow=workflow, chksum=chksum, ) success = impl.run_tests( wait=wait, check_throughput=check_throughput, check_memory=check_memory, ignore_namelists=ignore_namelists, ignore_memleak=ignore_memleak, ) if success and single_submit: # Get real test root test_root = impl._test_root job_cost_map = {} largest_case = 0 for test in impl._tests: test_dir = impl._get_test_dir(test) procs_needed = impl._get_procs_needed(test, RUN_PHASE) time_needed = convert_to_seconds( run_cmd_no_fail( "./xmlquery JOB_WALLCLOCK_TIME -value -subgroup case.test", from_dir=test_dir, ) ) job_cost_map[test] = (procs_needed, time_needed) if procs_needed > largest_case: largest_case = procs_needed if proc_pool is None: # Based on size of created jobs, choose a reasonable proc_pool. May need to put # more thought into this. proc_pool = 2 * largest_case # Create submit script single_submit_impl( machine_name, test_id, proc_pool, project, sys.argv[1:], job_cost_map, walltime, test_root, ) return success
def key_test_time(test_full_name): ############################################################################### result = get_recommended_test_time(test_full_name) return 99999999 if result is None else convert_to_seconds(result)
logger.debug("Using walltimemax {!r} from queue " "{!r}".format(walltime, self.text(queue))) if walltime is None: walltime = self._default_walltime logger.debug("Last resort using default walltime " "{!r}".format(walltime)) # only enforce when not running a test if not case.get_value("TEST"): walltimemin = specs[3] walltimemax = specs[4] walltime_seconds = convert_to_seconds(walltime) # walltime must not be less than walltimemin if walltimemin is not None: walltimemin_seconds = convert_to_seconds(walltimemin) if walltime_seconds < walltimemin_seconds: logger.warning("WARNING: Job {!r} walltime " "{!r} is less than queue " "{!r} minimum walltime " "{!r}, job might fail".format( job, walltime, self.text(queue), walltimemin)) # walltime must not be more than walltimemax if walltimemax is not None:
specs = self.get_queue_specs(queue) if walltime is None: # Figure out walltime if specs is None: # Queue is unknown, use specs from default queue walltime = self.get(self.get_default_queue(), "walltimemax") else: walltime = specs[3] walltime = self._default_walltime if walltime is None else walltime # last-chance fallback walltime_format = self.get_value("walltime_format") if walltime_format: seconds = convert_to_seconds(walltime) full_bab_time = convert_to_babylonian_time(seconds) walltime = format_time(walltime_format, "%H:%M:%S", full_bab_time) env_workflow.set_value("JOB_QUEUE", self.text(queue), subgroup=job, ignore_type=specs is None) env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job) logger.debug("Job {} queue {} walltime {}".format( job, self.text(queue), walltime)) def _match_attribs(self, attribs, case, queue):