def get_recommended_test_time_based_on_past(baseline_root, test, raw=False): if baseline_root is not None: try: the_path = os.path.join(baseline_root, _WALLTIME_BASELINE_NAME, test, _WALLTIME_FILE_NAME) if os.path.exists(the_path): last_line = int(open(the_path, "r").readlines()[-1].split()[0]) if raw: best_walltime = last_line else: best_walltime = None for cutoff, tolerance in _WALLTIME_TOLERANCE: if last_line <= cutoff: best_walltime = int(float(last_line) * tolerance) break if best_walltime < _GLOBAL_MINUMUM_TIME: best_walltime = _GLOBAL_MINUMUM_TIME best_walltime += _GLOBAL_WIGGLE return convert_to_babylonian_time(best_walltime) except Exception: # We NEVER want a failure here to kill the run logger.warning("Failed to read test time: {}".format(sys.exc_info()[1])) return None
def get_recommended_test_time_based_on_past(baseline_root, test, raw=False): if baseline_root is not None: try: the_path = os.path.join(baseline_root, _WALLTIME_BASELINE_NAME, test, _WALLTIME_FILE_NAME) if os.path.exists(the_path): last_line = int(open(the_path, "r").readlines()[-1]) if raw: best_walltime = last_line else: best_walltime = None for cutoff, tolerance in _WALLTIME_TOLERANCE: if last_line <= cutoff: best_walltime = int(float(last_line) * tolerance) break if best_walltime < _GLOBAL_MINUMUM_TIME: best_walltime = _GLOBAL_MINUMUM_TIME best_walltime += _GLOBAL_WIGGLE return convert_to_babylonian_time(best_walltime) except: # We NEVER want a failure here to kill the run logger.warning("Failed to read test time: {}".format(sys.exc_info()[0])) return None
specs = self.get_queue_specs(queue) if walltime is None: # Figure out walltime if specs is None: # Queue is unknown, use specs from default queue walltime = self.get(self.get_default_queue(), "walltimemax") else: walltime = specs[3] walltime = self._default_walltime if walltime is None else walltime # last-chance fallback walltime_format = self.get_value("walltime_format") if walltime_format: seconds = convert_to_seconds(walltime) full_bab_time = convert_to_babylonian_time(seconds) walltime = format_time(walltime_format, "%H:%M:%S", full_bab_time) env_workflow.set_value("JOB_QUEUE", self.text(queue), subgroup=job, ignore_type=specs is None) env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job) logger.debug("Job {} queue {} walltime {}".format( job, self.text(queue), walltime)) def _match_attribs(self, attribs, case, queue): # check for matches with case-vars
def single_submit_impl( machine_name, test_id, proc_pool, _, args, job_cost_map, wall_time, test_root ): ############################################################################### mach = Machines(machine=machine_name) expect( mach.has_batch_system(), "Single submit does not make sense on non-batch machine '%s'" % mach.get_machine_name(), ) machine_name = mach.get_machine_name() # # Compute arg list for second call to create_test # new_args = list(args) new_args.remove("--single-submit") new_args.append("--no-batch") new_args.append("--use-existing") no_arg_is_a_test_id_arg = True no_arg_is_a_proc_pool_arg = True no_arg_is_a_machine_arg = True for arg in new_args: if arg == "-t" or arg.startswith("--test-id"): no_arg_is_a_test_id_arg = False elif arg.startswith("--proc-pool"): no_arg_is_a_proc_pool_arg = False elif arg == "-m" or arg.startswith("--machine"): no_arg_is_a_machine_arg = True if no_arg_is_a_test_id_arg: new_args.append("-t %s" % test_id) if no_arg_is_a_proc_pool_arg: new_args.append("--proc-pool %d" % proc_pool) if no_arg_is_a_machine_arg: new_args.append("-m %s" % machine_name) # # Resolve batch directives manually. There is currently no other way # to do this without making a Case object. Make a throwaway case object # to help us here. # testcase_dirs = glob.glob("%s/*%s*/TestStatus" % (test_root, test_id)) expect(testcase_dirs, "No test case dirs found!?") first_case = os.path.abspath(os.path.dirname(testcase_dirs[0])) with Case(first_case, read_only=False) as case: env_batch = case.get_env("batch") submit_cmd = env_batch.get_value("batch_submit", subgroup=None) submit_args = env_batch.get_submit_args(case, "case.test") tasks_per_node = mach.get_value("MAX_MPITASKS_PER_NODE") num_nodes = int(math.ceil(float(proc_pool) / tasks_per_node)) if wall_time is None: wall_time = compute_total_time(job_cost_map, proc_pool) wall_time_bab = convert_to_babylonian_time(int(wall_time)) else: wall_time_bab = wall_time queue = env_batch.select_best_queue(num_nodes, proc_pool, walltime=wall_time_bab) wall_time_max_bab = env_batch.get_queue_specs(queue)[3] if wall_time_max_bab is not None: wall_time_max = convert_to_seconds(wall_time_max_bab) if wall_time_max < wall_time: wall_time = wall_time_max wall_time_bab = convert_to_babylonian_time(wall_time) overrides = { "job_id": "create_test_single_submit_%s" % test_id, "num_nodes": num_nodes, "tasks_per_node": tasks_per_node, "totaltasks": tasks_per_node * num_nodes, "job_wallclock_time": wall_time_bab, "job_queue": env_batch.text(queue), } directives = env_batch.get_batch_directives(case, "case.test", overrides=overrides) # # Make simple submit script and submit # script = "#! /bin/bash\n" script += "\n%s" % directives script += "\n" script += "cd %s\n" % os.getcwd() script += "%s %s\n" % (__file__, " ".join(new_args)) submit_cmd = "%s %s" % (submit_cmd, submit_args) logger.info("Script:\n%s" % script) run_cmd_no_fail( submit_cmd, input_str=script, arg_stdout=None, arg_stderr=None, verbose=True )