def restore_from_archive(self, rest_dir=None, dout_s_root=None, rundir=None): ############################################################################### """ Take archived restart files and load them into current case. Use rest_dir if provided otherwise use most recent restore_from_archive is a member of Class Case """ if dout_s_root is None: dout_s_root = self.get_value("DOUT_S_ROOT") if rundir is None: rundir = self.get_value("RUNDIR") if rest_dir is not None: if not os.path.isabs(rest_dir): rest_dir = os.path.join(dout_s_root, "rest", rest_dir) else: rest_dir = os.path.join(dout_s_root, "rest", ls_sorted_by_mtime(os.path.join(dout_s_root, "rest"))[-1]) logger.info("Restoring restart from {}".format(rest_dir)) for item in glob.glob("{}/*".format(rest_dir)): base = os.path.basename(item) dst = os.path.join(rundir, base) if os.path.exists(dst): os.remove(dst) logger.info("Restoring {} from {} to {}".format(item, rest_dir, rundir)) safe_copy(item, rundir)
def _do_full_nl_gen(case, test, generate_name, baseline_root=None): test_dir = case.get_value("CASEROOT") casedoc_dir = os.path.join(test_dir, "CaseDocs") baseline_root = case.get_value("BASELINE_ROOT") if baseline_root is None else baseline_root baseline_dir = os.path.join(baseline_root, generate_name, test) baseline_casedocs = os.path.join(baseline_dir, "CaseDocs") if not os.path.isdir(baseline_dir): os.makedirs(baseline_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH) if os.path.isdir(baseline_casedocs): shutil.rmtree(baseline_casedocs) shutil.copytree(casedoc_dir, baseline_casedocs) os.chmod(baseline_casedocs, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH) for item in glob.glob("{}/*".format(baseline_casedocs)): os.chmod(item, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP) for item in glob.glob(os.path.join(test_dir, "user_nl*")): preexisting_baseline = os.path.join(baseline_dir, os.path.basename(item)) if (os.path.exists(preexisting_baseline)): os.remove(preexisting_baseline) safe_copy(item, baseline_dir) os.chmod(preexisting_baseline, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
def _build_model_thread( config_dir, compclass, compname, caseroot, libroot, bldroot, incroot, file_build, thread_bad_results, smp, compiler, ): ############################################################################### logger.info("Building {} with output to {}".format(compclass, file_build)) t1 = time.time() cmd = os.path.join(caseroot, "SourceMods", "src." + compname, "buildlib") if os.path.isfile(cmd): logger.warning( "WARNING: using local buildlib script for {}".format(compname)) else: cmd = os.path.join(config_dir, "buildlib") expect(os.path.isfile(cmd), "Could not find buildlib for {}".format(compname)) compile_cmd = "COMP_CLASS={compclass} COMP_NAME={compname} {cmd} {caseroot} {libroot} {bldroot} ".format( compclass=compclass, compname=compname, cmd=cmd, caseroot=caseroot, libroot=libroot, bldroot=bldroot, ) if get_model() != "ufs": compile_cmd = "SMP={} {}".format(stringify_bool(smp), compile_cmd) if is_python_executable(cmd): logging_options = get_logging_options() if logging_options != "": compile_cmd = compile_cmd + logging_options with open(file_build, "w") as fd: stat = run_cmd(compile_cmd, from_dir=bldroot, arg_stdout=fd, arg_stderr=subprocess.STDOUT)[0] if stat != 0: thread_bad_results.append( "BUILD FAIL: {}.buildlib failed, cat {}".format( compname, file_build)) analyze_build_log(compclass, file_build, compiler) for mod_file in glob.glob(os.path.join(bldroot, "*_[Cc][Oo][Mm][Pp]_*.mod")): safe_copy(mod_file, incroot) t2 = time.time() logger.info("{} built in {:f} seconds".format(compname, (t2 - t1)))
def set_ref_case(self, case): rundir = case.get_value("RUNDIR") case_root = case.get_value("CASEROOT") if case_root.endswith(".postad"): ref_case_root = case_root.replace(".postad", ".ad") root = ".ad" else: ref_case_root = case_root.replace(".transient", ".postad") root = ".postad" expect( os.path.isdir(ref_case_root), "ERROR: spinup must be completed first, could not find directory {}" .format(ref_case_root)) with Case(ref_case_root) as refcase: refrundir = refcase.get_value("RUNDIR") case.set_value("RUN_REFDIR", refrundir) case.set_value("RUN_REFCASE", os.path.basename(ref_case_root)) for reffile in glob.iglob(refrundir + "/{}{}.*.nc".format(self.name, root)): m = re.search("(\d\d\d\d-\d\d-\d\d)-\d\d\d\d\d.nc", reffile) if m: refdate = m.group(1) symlink_force(reffile, os.path.join(rundir, os.path.basename(reffile))) for rpfile in glob.iglob(refrundir + "/rpointer*"): safe_copy(rpfile, rundir) if not os.path.isdir(os.path.join( rundir, "inputdata")) and os.path.isdir( os.path.join(refrundir, "inputdata")): symlink_force(os.path.join(refrundir, "inputdata"), os.path.join(rundir, "inputdata")) case.set_value("RUN_REFDATE", refdate) if case_root.endswith(".postad"): case.set_value("RUN_STARTDATE", refdate)
def _build_model_thread(config_dir, compclass, compname, caseroot, libroot, bldroot, incroot, file_build, thread_bad_results, smp, compiler): ############################################################################### logger.info("Building {} with output to {}".format(compclass, file_build)) t1 = time.time() cmd = os.path.join(caseroot, "SourceMods", "src." + compname, "buildlib") if os.path.isfile(cmd): logger.warning("WARNING: using local buildlib script for {}".format(compname)) else: cmd = os.path.join(config_dir, "buildlib") expect(os.path.isfile(cmd), "Could not find buildlib for {}".format(compname)) with open(file_build, "w") as fd: stat = run_cmd("MODEL={} SMP={} {} {} {} {} " .format(compclass, stringify_bool(smp), cmd, caseroot, libroot, bldroot), from_dir=bldroot, arg_stdout=fd, arg_stderr=subprocess.STDOUT)[0] analyze_build_log(compclass, file_build, compiler) if (stat != 0): thread_bad_results.append("BUILD FAIL: {}.buildlib failed, cat {}".format(compname, file_build)) for mod_file in glob.glob(os.path.join(bldroot, "*_[Cc][Oo][Mm][Pp]_*.mod")): safe_copy(mod_file, incroot) t2 = time.time() logger.info("{} built in {:f} seconds".format(compname, (t2 - t1)))
def _build_model_thread(config_dir, compclass, compname, caseroot, libroot, bldroot, incroot, file_build, thread_bad_results, smp, compiler): ############################################################################### logger.info("Building {} with output to {}".format(compclass, file_build)) t1 = time.time() cmd = os.path.join(caseroot, "SourceMods", "src." + compname, "buildlib") if os.path.isfile(cmd): logger.warning( "WARNING: using local buildlib script for {}".format(compname)) else: cmd = os.path.join(config_dir, "buildlib") expect(os.path.isfile(cmd), "Could not find buildlib for {}".format(compname)) with open(file_build, "w") as fd: stat = run_cmd("MODEL={} SMP={} {} {} {} {} ".format( compclass, stringify_bool(smp), cmd, caseroot, libroot, bldroot), from_dir=bldroot, arg_stdout=fd, arg_stderr=subprocess.STDOUT)[0] analyze_build_log(compclass, file_build, compiler) if (stat != 0): thread_bad_results.append( "BUILD FAIL: {}.buildlib failed, cat {}".format( compname, file_build)) for mod_file in glob.glob(os.path.join(bldroot, "*_[Cc][Oo][Mm][Pp]_*.mod")): safe_copy(mod_file, incroot) t2 = time.time() logger.info("{} built in {:f} seconds".format(compname, (t2 - t1)))
def restore_from_archive(self, rest_dir=None, dout_s_root=None, rundir=None): ############################################################################### """ Take archived restart files and load them into current case. Use rest_dir if provided otherwise use most recent restore_from_archive is a member of Class Case """ if dout_s_root is None: dout_s_root = self.get_value("DOUT_S_ROOT") if rundir is None: rundir = self.get_value("RUNDIR") if rest_dir is not None: if not os.path.isabs(rest_dir): rest_dir = os.path.join(dout_s_root, "rest", rest_dir) else: rest_dir = os.path.join( dout_s_root, "rest", ls_sorted_by_mtime(os.path.join(dout_s_root, "rest"))[-1]) logger.info("Restoring restart from {}".format(rest_dir)) for item in glob.glob("{}/*".format(rest_dir)): base = os.path.basename(item) dst = os.path.join(rundir, base) if os.path.exists(dst): os.remove(dst) logger.info("Restoring {} from {} to {}".format( item, rest_dir, rundir)) safe_copy(item, rundir)
def _record_git_provenance(srcroot, exeroot, lid): """ Records git provenance Records git status, diff and logs for main repo and all submodules. """ # Git Status status_prov = os.path.join(exeroot, "GIT_STATUS.{}".format(lid)) _run_git_cmd_recursively("status", srcroot, status_prov) # Git Diff diff_prov = os.path.join(exeroot, "GIT_DIFF.{}".format(lid)) _run_git_cmd_recursively("diff", srcroot, diff_prov) # Git Log log_prov = os.path.join(exeroot, "GIT_LOG.{}".format(lid)) cmd = "log --first-parent --pretty=oneline -n 5" _run_git_cmd_recursively(cmd, srcroot, log_prov) # Git remote remote_prov = os.path.join(exeroot, "GIT_REMOTE.{}".format(lid)) _run_git_cmd_recursively("remote -v", srcroot, remote_prov) gitroot = _find_git_root(srcroot) gitroot = _parse_dot_git_path(gitroot) # Git config config_src = os.path.join(gitroot, "config") config_prov = os.path.join(exeroot, "GIT_CONFIG.{}".format(lid)) safe_copy(config_src, config_prov, preserve_meta=False)
def _archive_rpointer_files(casename, ninst_strings, rundir, save_interim_restart_files, archive, archive_entry, archive_restdir, datename, datename_is_last): ############################################################################### if datename_is_last: # Copy of all rpointer files for latest restart date rpointers = glob.glob(os.path.join(rundir, 'rpointer.*')) for rpointer in rpointers: safe_copy( rpointer, os.path.join(archive_restdir, os.path.basename(rpointer))) else: # Generate rpointer file(s) for interim restarts for the one datename and each # possible value of ninst_strings if save_interim_restart_files: # parse env_archive.xml to determine the rpointer files # and contents for the given archive_entry tag rpointer_items = archive.get_rpointer_contents(archive_entry) # loop through the possible rpointer files and contents for rpointer_file, rpointer_content in rpointer_items: temp_rpointer_file = rpointer_file temp_rpointer_content = rpointer_content # put in a temporary setting for ninst_strings if they are empty # in order to have just one loop over ninst_strings below if rpointer_content != 'unset': if not ninst_strings: ninst_strings = ["empty"] for ninst_string in ninst_strings: rpointer_file = temp_rpointer_file rpointer_content = temp_rpointer_content if ninst_string == 'empty': ninst_string = "" for key, value in [ ('$CASE', casename), ('$DATENAME', _datetime_str(datename)), ('$MPAS_DATENAME', _datetime_str_mpas(datename)), ('$NINST_STRING', ninst_string) ]: rpointer_file = rpointer_file.replace(key, value) rpointer_content = rpointer_content.replace( key, value) # write out the respective files with the correct contents rpointer_file = os.path.join(archive_restdir, rpointer_file) logger.info( "writing rpointer_file {}".format(rpointer_file)) f = open(rpointer_file, 'w') for output in rpointer_content.split(','): f.write("{} \n".format(output)) f.close() else: logger.info( "rpointer_content unset, not creating rpointer file {}" .format(rpointer_file))
def _archive_history_files(archive, archive_entry, compclass, compname, histfiles_savein_rundir, last_date, archive_file_fn, dout_s_root, casename, rundir): ############################################################################### """ perform short term archiving on history files in rundir Not doc-testable due to case and file system dependence """ # determine history archive directory (create if it does not exist) archive_histdir = os.path.join(dout_s_root, compclass, 'hist') if not os.path.exists(archive_histdir): os.makedirs(archive_histdir) logger.debug("created directory {}".format(archive_histdir)) # the compname is drv but the files are named cpl if compname == 'drv': compname = 'cpl' if compname == 'clm': compname = r'clm2?' # determine ninst and ninst_string # archive history files - the only history files that kept in the # run directory are those that are needed for restarts for suffix in archive.get_hist_file_extensions(archive_entry): if compname.find('mpas') == 0: newsuffix = compname + r'\d*' else: newsuffix = casename + r'\.' + compname + r'_?' + r'\d*' newsuffix += r'\.' + suffix if not suffix.endswith('$'): newsuffix += r'\.' logger.debug("short term archiving suffix is {} ".format(newsuffix)) pfile = re.compile(newsuffix) histfiles = [f for f in os.listdir(rundir) if pfile.search(f)] logger.debug("histfiles = {} ".format(histfiles)) if histfiles: for histfile in histfiles: file_date = get_file_date(os.path.basename(histfile)) if last_date is None or file_date is None or file_date <= last_date: srcfile = join(rundir, histfile) expect(os.path.isfile(srcfile), "history file {} does not exist ".format(srcfile)) destfile = join(archive_histdir, histfile) if histfile in histfiles_savein_rundir: logger.info("copying {} to {} ".format( srcfile, destfile)) safe_copy(srcfile, destfile) else: logger.info("moving {} to {} ".format( srcfile, destfile)) archive_file_fn(srcfile, destfile)
def _archive_history_files(archive, archive_entry, compclass, compname, histfiles_savein_rundir, last_date, archive_file_fn, dout_s_root, casename, rundir): ############################################################################### """ perform short term archiving on history files in rundir Not doc-testable due to case and file system dependence """ # determine history archive directory (create if it does not exist) archive_histdir = os.path.join(dout_s_root, compclass, 'hist') if not os.path.exists(archive_histdir): os.makedirs(archive_histdir) logger.debug("created directory {}".format(archive_histdir)) # the compname is drv but the files are named cpl if compname == 'drv': compname = 'cpl' if compname == 'clm': compname = r'clm2?' # determine ninst and ninst_string # archive history files - the only history files that kept in the # run directory are those that are needed for restarts for suffix in archive.get_hist_file_extensions(archive_entry): if compname.find('mpas') == 0 or compname == 'mali': newsuffix = compname + r'\d*' else: newsuffix = casename + r'\.' + compname + r'_?' + r'\d*' newsuffix += r'\.' + suffix if not suffix.endswith('$'): newsuffix += r'\.' logger.debug("short term archiving suffix is {} ".format(newsuffix)) pfile = re.compile(newsuffix) histfiles = [f for f in os.listdir(rundir) if pfile.search(f)] logger.debug("histfiles = {} ".format(histfiles)) if histfiles: for histfile in histfiles: file_date = get_file_date(os.path.basename(histfile)) if last_date is None or file_date is None or file_date <= last_date: srcfile = join(rundir, histfile) expect(os.path.isfile(srcfile), "history file {} does not exist ".format(srcfile)) destfile = join(archive_histdir, histfile) if histfile in histfiles_savein_rundir: logger.info("copying {} to {} ".format(srcfile, destfile)) safe_copy(srcfile, destfile) else: logger.info("moving {} to {} ".format(srcfile, destfile)) archive_file_fn(srcfile, destfile)
def change_file(self, newfile, copy=False): if copy: new_case = os.path.dirname(newfile) if not os.path.exists(new_case): os.makedirs(new_case) safe_copy(self.filename, newfile) self.tree = None self.filename = newfile self.read(newfile)
def _create_macros_cmake(caseroot, cmake_macros_dir, mach_obj, compiler, case_cmake_path): ############################################################################### if not os.path.isfile(os.path.join(caseroot, "Macros.cmake")): safe_copy(os.path.join(cmake_macros_dir, "Macros.cmake"), caseroot) if not os.path.exists(os.path.join(caseroot, "cmake_macros")): shutil.copytree(cmake_macros_dir, case_cmake_path) copy_depends_files(mach_obj.get_machine_name(), mach_obj.machines_dir, caseroot, compiler)
def _case_two_custom_postrun_action(self): # Link back to original case1 name # This is needed so that the necessary files are present for # baseline comparison and generation, # since some of them may have been moved to the archive directory for case_file in glob.iglob(os.path.join(self._case1.get_value("RUNDIR"), "*.nc.{}".format(self._run_one_suffix))): orig_file = case_file[:-(1+len(self._run_one_suffix))] if not os.path.isfile(orig_file): safe_copy(case_file, orig_file)
def _helper(dout_sr, refdate, refsec, rundir): rest_path = os.path.join(dout_sr, "rest", "{}-{}".format(refdate, refsec)) for item in glob.glob("{}/*{}*".format(rest_path, refdate)): dst = os.path.join(rundir, os.path.basename(item)) if os.path.exists(dst): os.remove(dst) os.symlink(item, dst) for item in glob.glob("{}/*rpointer*".format(rest_path)): safe_copy(item, rundir)
def _case_two_custom_postrun_action(self): # Link back to original case1 name # This is needed so that the necessary files are present for # baseline comparison and generation, # since some of them may have been moved to the archive directory for case_file in glob.iglob( os.path.join(self._case1.get_value("RUNDIR"), "*.nc.{}".format(self._run_one_suffix))): orig_file = case_file[:-(1 + len(self._run_one_suffix))] if not os.path.isfile(orig_file): safe_copy(case_file, orig_file)
def test_xml_caching(self): casedir = self._create_test( ["--no-build", "TESTRUNPASS.f19_g16_rx1.A"], test_id=self._baseline_name) active = os.path.join(casedir, "env_run.xml") backup = os.path.join(casedir, "env_run.xml.bak") utils.safe_copy(active, backup) with Case(casedir, read_only=False) as case: env_run = EnvRun(casedir, read_only=True) self.assertEqual(case.get_value("RUN_TYPE"), "startup") case.set_value("RUN_TYPE", "branch") self.assertEqual(case.get_value("RUN_TYPE"), "branch") self.assertEqual(env_run.get_value("RUN_TYPE"), "branch") with Case(casedir) as case: self.assertEqual(case.get_value("RUN_TYPE"), "branch") time.sleep(0.2) utils.safe_copy(backup, active) with Case(casedir, read_only=False) as case: self.assertEqual(case.get_value("RUN_TYPE"), "startup") case.set_value("RUN_TYPE", "branch") with Case(casedir, read_only=False) as case: self.assertEqual(case.get_value("RUN_TYPE"), "branch") time.sleep(0.2) utils.safe_copy(backup, active) case.read_xml() # Manual re-sync self.assertEqual(case.get_value("RUN_TYPE"), "startup") case.set_value("RUN_TYPE", "branch") self.assertEqual(case.get_value("RUN_TYPE"), "branch") with Case(casedir) as case: self.assertEqual(case.get_value("RUN_TYPE"), "branch") time.sleep(0.2) utils.safe_copy(backup, active) env_run = EnvRun(casedir, read_only=True) self.assertEqual(env_run.get_value("RUN_TYPE"), "startup") with Case(casedir, read_only=False) as case: self.assertEqual(case.get_value("RUN_TYPE"), "startup") case.set_value("RUN_TYPE", "branch") # behind the back detection. with self.assertRaises(utils.CIMEError): with Case(casedir, read_only=False) as case: case.set_value("RUN_TYPE", "startup") time.sleep(0.2) utils.safe_copy(backup, active)
def _copy_user_modified_to_clone(self, origpath, newpath): """ If file_ exists and is a link in newpath, and exists but is not a link in origpath, copy origpath file to newpath """ for file_ in os.listdir(newpath): if (os.path.islink(os.path.join(newpath, file_)) and os.path.isfile(os.path.join(origpath, file_)) and not os.path.islink(os.path.join(origpath, file_))): logger.info("Copying user modified file {} to clone".format(file_)) os.unlink(os.path.join(newpath, file_)) safe_copy(os.path.join(origpath, file_), newpath)
def _pre_run_check(case, lid, skip_pnl=False, da_cycle=0): ############################################################################### # Pre run initialization code.. if da_cycle > 0: case.create_namelists(component='cpl') return caseroot = case.get_value("CASEROOT") din_loc_root = case.get_value("DIN_LOC_ROOT") rundir = case.get_value("RUNDIR") build_complete = case.get_value("BUILD_COMPLETE") if case.get_value("TESTCASE") == "PFS": env_mach_pes = os.path.join(caseroot,"env_mach_pes.xml") safe_copy(env_mach_pes,"{}.{}".format(env_mach_pes, lid)) # check for locked files. case.check_lockedfiles() logger.debug("check_lockedfiles OK") # check that build is done expect(build_complete, "BUILD_COMPLETE is not true\nPlease rebuild the model interactively") logger.debug("build complete is {} ".format(build_complete)) # load the module environment... case.load_env(reset=True) # create the timing directories, optionally cleaning them if needed. if os.path.isdir(os.path.join(rundir, "timing")): shutil.rmtree(os.path.join(rundir, "timing")) os.makedirs(os.path.join(rundir, "timing", "checkpoints")) # This needs to be done everytime the LID changes in order for log files to be set up correctly # The following also needs to be called in case a user changes a user_nl_xxx file OR an env_run.xml # variable while the job is in the queue logger.debug("{} NAMELIST CREATION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S"))) if skip_pnl: case.create_namelists(component='cpl') else: logger.info("Generating namelists for {}".format(caseroot)) case.create_namelists() logger.debug("{} NAMELIST CREATION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S"))) logger.info("-------------------------------------------------------------------------") logger.info(" - Prestage required restarts into {}".format(rundir)) logger.info(" - Case input data directory (DIN_LOC_ROOT) is {} ".format(din_loc_root)) logger.info(" - Checking for required input datasets in DIN_LOC_ROOT") logger.info("-------------------------------------------------------------------------")
def generate_teststatus(testdir, baseline_dir): """ CESM stores it's TestStatus file in baselines. Do not let exceptions escape from this function. """ if get_model() == "cesm": try: if not os.path.isdir(baseline_dir): os.makedirs(baseline_dir) safe_copy(os.path.join(testdir, TEST_STATUS_FILENAME), baseline_dir) except Exception as e: logger.warning("Could not copy {} to baselines, {}".format(os.path.join(testdir, TEST_STATUS_FILENAME), str(e)))
def create_perturbed_init_file(original, perturb_file, outfile, weight): ncflint = "ncflint" if not os.path.isdir(os.path.dirname(outfile)): os.makedirs(os.path.dirname(outfile)) safe_copy(original, outfile) if "BWHIST" in original: cmd = ncflint + " -A -v US,VS,T,Q,PS -w {},1.0 {} {} {}".format( weight, perturb_file, original, outfile) else: cmd = ncflint + " -O -C -v lat,lon,slat,slon,lev,ilev,hyai,hybi,hyam,hybm,US,VS,T,Q,PS -w {},1.0 {} {} {}".format( weight, perturb_file, original, outfile) run_cmd(cmd, verbose=True) os.rename(outfile, outfile.replace("-tmp.nc", "-00000.nc"))
def _copy_depends_files(machine_name, machines_dir, output_dir, compiler): """ Copy any system or compiler Depends files if they do not exist in the output directory If there is a match for Depends.machine_name.compiler copy that and ignore the others """ for suffix in [ machine_name, compiler, "{}.{}".format(machine_name, compiler) ]: for extra_suffix in ["", ".cmake"]: basename = "Depends.{}{}".format(suffix, extra_suffix) dfile = os.path.join(machines_dir, basename) outputdfile = os.path.join(output_dir, basename) if os.path.isfile(dfile) and not os.path.exists(outputdfile): safe_copy(dfile, outputdfile)
def _archive_rpointer_files(casename, ninst_strings, rundir, save_interim_restart_files, archive, archive_entry, archive_restdir, datename, datename_is_last): ############################################################################### if datename_is_last: # Copy of all rpointer files for latest restart date rpointers = glob.glob(os.path.join(rundir, 'rpointer.*')) for rpointer in rpointers: safe_copy(rpointer, os.path.join(archive_restdir, os.path.basename(rpointer))) else: # Generate rpointer file(s) for interim restarts for the one datename and each # possible value of ninst_strings if save_interim_restart_files: # parse env_archive.xml to determine the rpointer files # and contents for the given archive_entry tag rpointer_items = archive.get_rpointer_contents(archive_entry) # loop through the possible rpointer files and contents for rpointer_file, rpointer_content in rpointer_items: temp_rpointer_file = rpointer_file temp_rpointer_content = rpointer_content # put in a temporary setting for ninst_strings if they are empty # in order to have just one loop over ninst_strings below if rpointer_content != 'unset': if not ninst_strings: ninst_strings = ["empty"] for ninst_string in ninst_strings: rpointer_file = temp_rpointer_file rpointer_content = temp_rpointer_content if ninst_string == 'empty': ninst_string = "" for key, value in [('$CASE', casename), ('$DATENAME', _datetime_str(datename)), ('$MPAS_DATENAME', _datetime_str_mpas(datename)), ('$NINST_STRING', ninst_string)]: rpointer_file = rpointer_file.replace(key, value) rpointer_content = rpointer_content.replace(key, value) # write out the respective files with the correct contents rpointer_file = os.path.join(archive_restdir, rpointer_file) logger.info("writing rpointer_file {}".format(rpointer_file)) f = open(rpointer_file, 'w') for output in rpointer_content.split(','): f.write("{} \n".format(output)) f.close() else: logger.info("rpointer_content unset, not creating rpointer file {}".format(rpointer_file))
def _record_git_provenance(srcroot, exeroot, lid): """Records git provenance Records git status, diff and logs for main repo and all submodules. """ # Save git describe describe_prov = os.path.join(exeroot, "GIT_DESCRIBE.{}".format(lid)) desc = utils.get_current_commit(tag=True, repo=srcroot) with open(describe_prov, "w") as fd: fd.write(desc) gitroot = _find_git_root(srcroot) # Save HEAD headfile = os.path.join(gitroot, "logs", "HEAD") headfile_prov = os.path.join(exeroot, "GIT_LOGS_HEAD.{}".format(lid)) if os.path.exists(headfile_prov): os.remove(headfile_prov) if os.path.exists(headfile): utils.safe_copy(headfile, headfile_prov, preserve_meta=False) # Save git submodule status submodule_prov = os.path.join(exeroot, "GIT_SUBMODULE_STATUS.{}".format(lid)) subm_status = utils.get_current_submodule_status(recursive=True, repo=srcroot) with open(submodule_prov, "w") as fd: fd.write(subm_status) # Git Status status_prov = os.path.join(exeroot, "GIT_STATUS.{}".format(lid)) _run_git_cmd_recursively("status", srcroot, status_prov) # Git Diff diff_prov = os.path.join(exeroot, "GIT_DIFF.{}".format(lid)) _run_git_cmd_recursively("diff", srcroot, diff_prov) # Git Log log_prov = os.path.join(exeroot, "GIT_LOG.{}".format(lid)) cmd = "log --first-parent --pretty=oneline -n 5" _run_git_cmd_recursively(cmd, srcroot, log_prov) # Git remote remote_prov = os.path.join(exeroot, "GIT_REMOTE.{}".format(lid)) _run_git_cmd_recursively("remote -v", srcroot, remote_prov) # Git config config_src = os.path.join(gitroot, "config") config_prov = os.path.join(exeroot, "GIT_CONFIG.{}".format(lid)) utils.safe_copy(config_src, config_prov, preserve_meta=False)
def _build_model_thread(config_dir, compclass, compname, caseroot, libroot, bldroot, incroot, file_build, thread_bad_results, smp, compiler, case): ############################################################################### logger.info("Building {} with output to {}".format(compclass, file_build)) t1 = time.time() cmd = os.path.join(caseroot, "SourceMods", "src." + compname, "buildlib") if os.path.isfile(cmd): logger.warning( "WARNING: using local buildlib script for {}".format(compname)) else: cmd = os.path.join(config_dir, "buildlib") expect(os.path.isfile(cmd), "Could not find buildlib for {}".format(compname)) # Add to this list as components are converted to python/cmake if compname in ["cam"] and get_model() == "e3sm": try: stat = 0 run_sub_or_cmd(cmd, [caseroot, libroot, bldroot], "buildlib", [bldroot, libroot, case], logfile=file_build) except Exception: stat = 1 else: with open(file_build, "w") as fd: stat = run_cmd("MODEL={} SMP={} {} {} {} {} ".format( compclass, stringify_bool(smp), cmd, caseroot, libroot, bldroot), from_dir=bldroot, arg_stdout=fd, arg_stderr=subprocess.STDOUT)[0] analyze_build_log(compclass, file_build, compiler) if stat != 0: thread_bad_results.append( "BUILD FAIL: {}.buildlib failed, cat {}".format( compname, file_build)) analyze_build_log(compclass, file_build, compiler) for mod_file in glob.glob(os.path.join(bldroot, "*_[Cc][Oo][Mm][Pp]_*.mod")): safe_copy(mod_file, incroot) t2 = time.time() logger.info("{} built in {:f} seconds".format(compname, (t2 - t1)))
def __enter__(self): cmake_macros_dir = os.path.join(self._macroloc, "cmake_macros") expect( os.path.isdir(cmake_macros_dir), "Cannot create cmake temp build dir, no {} macros found".format( cmake_macros_dir), ) cmake_lists = os.path.join(cmake_macros_dir, "CMakeLists.txt") full_tmp_dir = self.get_full_tmpdir() Path(full_tmp_dir).mkdir(parents=False, exist_ok=True) safe_copy(cmake_lists, full_tmp_dir) self._entered = True return self
def _copy_depends_files(machine_name, machines_dir, output_dir, compiler): """ Copy any system or compiler Depends files if they do not exist in the output directory If there is a match for Depends.machine_name.compiler copy that and ignore the others """ dfile = os.path.join(machines_dir, "Depends.{}.{}".format(machine_name,compiler)) outputdfile = os.path.join(output_dir, "Depends.{}.{}".format(machine_name,compiler)) if os.path.isfile(dfile): if not os.path.isfile(outputdfile): safe_copy(dfile, outputdfile) else: for dep in (machine_name, compiler): dfile = os.path.join(machines_dir, "Depends.{}".format(dep)) outputdfile = os.path.join(output_dir, "Depends.{}".format(dep)) if os.path.isfile(dfile) and not os.path.isfile(outputdfile): safe_copy(dfile, outputdfile)
def _archive_spio_stats(lid, rundir): # Copy Scorpio I/O performance stats in "spio_stats" to "spio_stats.[LID]" + tar + compress spio_stats_dir = os.path.join(rundir, "spio_stats") if not os.path.exists(spio_stats_dir): os.mkdir(spio_stats_dir) for item in glob.glob(os.path.join(rundir, "io_perf_summary*")): utils.safe_copy(item, spio_stats_dir) spio_stats_job_dir = os.path.join(rundir, "spio_stats." + lid) shutil.copytree(spio_stats_dir, spio_stats_job_dir) with tarfile.open("%s.tar.gz" % spio_stats_job_dir, "w:gz") as tfd: tfd.add(spio_stats_job_dir, arcname=os.path.basename(spio_stats_job_dir)) shutil.rmtree(spio_stats_job_dir)
def lock_file(filename, caseroot=None, newname=None): expect("/" not in filename, "Please just provide basename of locked file") caseroot = os.getcwd() if caseroot is None else caseroot newname = filename if newname is None else newname fulllockdir = os.path.join(caseroot, LOCKED_DIR) if not os.path.exists(fulllockdir): os.mkdir(fulllockdir) logging.debug("Locking file {}".format(filename)) # JGF: It is extremely dangerous to alter our database (xml files) without # going through the standard API. The copy below invalidates all existing # GenericXML instances that represent this file and all caching that may # have involved this file. We should probably seek a safer way of locking # files. safe_copy(os.path.join(caseroot, filename), os.path.join(fulllockdir, newname))
def _save_build_provenance_e3sm(case, lid): cimeroot = case.get_value("CIMEROOT") exeroot = case.get_value("EXEROOT") caseroot = case.get_value("CASEROOT") # Save git describe describe_prov = os.path.join(exeroot, "GIT_DESCRIBE.{}".format(lid)) desc = get_current_commit(tag=True, repo=cimeroot) with open(describe_prov, "w") as fd: fd.write(desc) # Save HEAD headfile = os.path.join(cimeroot, ".git", "logs", "HEAD") headfile_prov = os.path.join(exeroot, "GIT_LOGS_HEAD.{}".format(lid)) if os.path.exists(headfile_prov): os.remove(headfile_prov) if os.path.exists(headfile): safe_copy(headfile, headfile_prov, preserve_meta=False) # Save SourceMods sourcemods = os.path.join(caseroot, "SourceMods") sourcemods_prov = os.path.join(exeroot, "SourceMods.{}.tar.gz".format(lid)) if os.path.exists(sourcemods_prov): os.remove(sourcemods_prov) if os.path.isdir(sourcemods): with tarfile.open(sourcemods_prov, "w:gz") as tfd: tfd.add(sourcemods, arcname="SourceMods") # Save build env env_prov = os.path.join(exeroot, "build_environment.{}.txt".format(lid)) if os.path.exists(env_prov): os.remove(env_prov) env_module = case.get_env("mach_specific") env_module.save_all_env_info(env_prov) # For all the just-created post-build provenance files, symlink a generic name # to them to indicate that these are the most recent or active. for item in ["GIT_DESCRIBE", "GIT_LOGS_HEAD", "SourceMods", "build_environment"]: globstr = "{}/{}.{}*".format(exeroot, item, lid) matches = glob.glob(globstr) expect(len(matches) < 2, "Multiple matches for glob {} should not have happened".format(globstr)) if matches: the_match = matches[0] generic_name = the_match.replace(".{}".format(lid), "") if os.path.exists(generic_name): os.remove(generic_name) os.symlink(the_match, generic_name)
def copy(case, suffix): """Copy the most recent batch of hist files in a case, adding the given suffix. This can allow you to temporarily "save" these files so they won't be blown away if you re-run the case. case - The case containing the files you want to save suffix - The string suffix you want to add to saved files, this can be used to find them later. """ rundir = case.get_value("RUNDIR") ref_case = case.get_value("RUN_REFCASE") # Loop over models archive = case.get_env("archive") comments = "Copying hist files to suffix '{}'\n".format(suffix) num_copied = 0 for model in _iter_model_file_substrs(case): comments += " Copying hist files for model '{}'\n".format(model) if model == 'cpl': file_extensions = archive.get_hist_file_extensions(archive.get_entry('drv')) else: file_extensions = archive.get_hist_file_extensions(archive.get_entry(model)) test_hists = _get_latest_hist_files(model, rundir, file_extensions, ref_case=ref_case) num_copied += len(test_hists) for test_hist in test_hists: new_file = "{}.{}".format(test_hist, suffix) if os.path.exists(new_file): os.remove(new_file) comments += " Copying '{}' to '{}'\n".format(test_hist, new_file) # Need to copy rather than move in case there are some history files # that will need to continue to be filled on the next phase; this # can be the case for a restart run. # # (If it weren't for that possibility, a move/rename would be more # robust here: The problem with a copy is that there can be # confusion after the second run as to which files were created by # the first run and which by the second. For example, if the second # run fails to output any history files, the test will still pass, # because the test system will think that run1's files were output # by run2. But we live with that downside for the sake of the reason # noted above.) safe_copy(test_hist, new_file) expect(num_copied > 0, "copy failed: no hist files found in rundir '{}'".format(rundir)) return comments
def _copy_depends_files(machine_name, machines_dir, output_dir, compiler): """ Copy any system or compiler Depends files if they do not exist in the output directory If there is a match for Depends.machine_name.compiler copy that and ignore the others """ dfile = os.path.join(machines_dir, "Depends.{}.{}".format(machine_name, compiler)) outputdfile = os.path.join(output_dir, "Depends.{}.{}".format(machine_name, compiler)) if os.path.isfile(dfile): if not os.path.isfile(outputdfile): safe_copy(dfile, outputdfile) else: for dep in (machine_name, compiler): dfile = os.path.join(machines_dir, "Depends.{}".format(dep)) outputdfile = os.path.join(output_dir, "Depends.{}".format(dep)) if os.path.isfile(dfile) and not os.path.isfile(outputdfile): safe_copy(dfile, outputdfile)
def _cmpgen_namelists(self): """Redoes the namelist comparison & generation with appropriate namelists The standard namelist comparison & generation is done with the CaseDocs directory from the test case. That isn't appropriate here, because those namelists aren't actually used in this test. Instead, we want to compare & generate the namelists used by the atm_driver-lilac-ctsm execution. Here, we do some file copies and then re-call the namelist comparison & generation script in order to accomplish this. This will overwrite the namelists generated earlier in the test, and will also replace the results of the NLCOMP phase. Note that we expect a failure in the NLCOMP phase that is run earlier in the test, because that one will have compared the test's standard CaseDocs with the files generated from here - and those two sets of namelists can be quite different. """ caseroot = self._case.get_value('CASEROOT') casedocs = os.path.join(caseroot, 'CaseDocs') if os.path.exists(casedocs): shutil.rmtree(casedocs) os.makedirs(casedocs) # case_cmpgen_namelists uses the existence of drv_in to decide whether namelists # need to be regenerated. We do NOT want it to regenerate namelists, so we give it # the file it wants. with open(os.path.join(casedocs, 'drv_in'), 'a') as drv_in: pass for onefile in _LILAC_RUNTIME_FILES + ['atm_driver_in']: safe_copy(os.path.join(self._atm_driver_rundir(), onefile), os.path.join(casedocs, onefile)) success = self._case.case_cmpgen_namelists() # The setting of the NLCOMP phase status in case_cmpgen_namelists doesn't work # here (probably because the test object has a saved version of the test status # and so, when it goes to write the status of the build phase, it ends up # overwriting whatever was set by case_cmpgen_namelists). So we need to set it # here. with self._test_status: self._test_status.set_status( NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS, comments="(used lilac namelists)")
def _build_usernl_files(case, model, comp): ############################################################################### """ Create user_nl_xxx files, expects cwd is caseroot """ model = model.upper() if model == "DRV": model_file = case.get_value("CONFIG_CPL_FILE") else: model_file = case.get_value("CONFIG_{}_FILE".format(model)) expect(model_file is not None, "Could not locate CONFIG_{}_FILE in config_files.xml".format(model)) model_dir = os.path.dirname(model_file) expect(os.path.isdir(model_dir), "cannot find cime_config directory {} for component {}".format(model_dir, comp)) comp_interface = case.get_value("COMP_INTERFACE") multi_driver = case.get_value("MULTI_DRIVER") ninst = 1 if multi_driver: ninst_max = case.get_value("NINST_MAX") if comp_interface != "nuopc" and model not in ("DRV","CPL","ESP"): ninst_model = case.get_value("NINST_{}".format(model)) expect(ninst_model==ninst_max,"MULTI_DRIVER mode, all components must have same NINST value. NINST_{} != {}".format(model,ninst_max)) if comp == "cpl": if not os.path.exists("user_nl_cpl"): safe_copy(os.path.join(model_dir, "user_nl_cpl"), ".") else: if comp_interface == "nuopc": ninst = case.get_value("NINST") elif ninst == 1: ninst = case.get_value("NINST_{}".format(model)) nlfile = "user_nl_{}".format(comp) model_nl = os.path.join(model_dir, nlfile) if ninst > 1: for inst_counter in range(1, ninst+1): inst_nlfile = "{}_{:04d}".format(nlfile, inst_counter) if not os.path.exists(inst_nlfile): # If there is a user_nl_foo in the case directory, copy it # to user_nl_foo_INST; otherwise, copy the original # user_nl_foo from model_dir if os.path.exists(nlfile): safe_copy(nlfile, inst_nlfile) elif os.path.exists(model_nl): safe_copy(model_nl, inst_nlfile) else: # ninst = 1 if not os.path.exists(nlfile): if os.path.exists(model_nl): safe_copy(model_nl, nlfile)
def _copy_depends_files(machine_name, machines_dir, output_dir, compiler): """ Copy any system or compiler Depends files if they do not exist in the output directory If there is a match for Depends.machine_name.compiler copy that and ignore the others """ # Note, the cmake build system does not stop if Depends.mach.compiler.cmake is found makefiles_done = False both = "{}.{}".format(machine_name, compiler) for suffix in [both, machine_name, compiler]: for extra_suffix in ["", ".cmake"]: if extra_suffix == "" and makefiles_done: continue basename = "Depends.{}{}".format(suffix, extra_suffix) dfile = os.path.join(machines_dir, basename) outputdfile = os.path.join(output_dir, basename) if os.path.isfile(dfile): if suffix == both and extra_suffix == "": makefiles_done = True if not os.path.exists(outputdfile): safe_copy(dfile, outputdfile)
def _generate_baseline(self): """ generate a new baseline case based on the current test """ with self._test_status: # generate baseline success, comments = generate_baseline(self._case) append_testlog(comments) status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS baseline_name = self._case.get_value("BASEGEN_CASE") self._test_status.set_status(GENERATE_PHASE, status, comments=os.path.dirname(baseline_name)) basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) # copy latest cpl log to baseline # drop the date so that the name is generic newestcpllogfiles = self._get_latest_cpl_logs() for cpllog in newestcpllogfiles: m = re.search(r"/({}.*.log).*.gz".format(self._cpllog),cpllog) if m is not None: baselog = os.path.join(basegen_dir, m.group(1))+".gz" safe_copy(cpllog, os.path.join(basegen_dir,baselog))
def _generate_baseline(self): super(TSC, self)._generate_baseline() with SharedArea(): basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) rundir = self._case.get_value("RUNDIR") ref_case = self._case.get_value("RUN_REFCASE") model = 'cam' hists = _get_all_hist_files(model, rundir, [r'h\d*.*\.nc\.DT\d*'], ref_case=ref_case) logger.debug("TSC additional baseline files: {}".format(hists)) for hist in hists: basename = hist[hist.rfind(model):] baseline = os.path.join(basegen_dir, basename) if os.path.exists(baseline): os.remove(baseline) safe_copy(hist, baseline, preserve_meta=False)
def _do_full_nl_gen_impl(case, test, generate_name, baseline_root=None): test_dir = case.get_value("CASEROOT") casedoc_dir = os.path.join(test_dir, "CaseDocs") baseline_root = case.get_value("BASELINE_ROOT") if baseline_root is None else baseline_root baseline_dir = os.path.join(baseline_root, generate_name, test) baseline_casedocs = os.path.join(baseline_dir, "CaseDocs") if not os.path.isdir(baseline_dir): os.makedirs(baseline_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH) if os.path.isdir(baseline_casedocs): shutil.rmtree(baseline_casedocs) dir_util.copy_tree(casedoc_dir, baseline_casedocs, preserve_mode=False) for item in glob.glob(os.path.join(test_dir, "user_nl*")): preexisting_baseline = os.path.join(baseline_dir, os.path.basename(item)) if (os.path.exists(preexisting_baseline)): os.remove(preexisting_baseline) safe_copy(item, baseline_dir, preserve_meta=False)
def copy_inputs_to_rundir(caseroot, compname, confdir, rundir, inst_string): if os.path.isdir(rundir): filename = compname + "_in" file_src = os.path.join(confdir, filename) file_dest = os.path.join(rundir, filename) if inst_string: file_dest += inst_string safe_copy(file_src, file_dest) for xmlfile in glob.glob(os.path.join(confdir, "*streams*.xml")): casexml = os.path.join(caseroot, os.path.basename(xmlfile)) if os.path.exists(casexml): logger.info("Using {} for {} streams".format( casexml, compname)) safe_copy(casexml, rundir) else: safe_copy(xmlfile, rundir)
def create_namelists(self, component=None): """ Create component namelists """ self.flush() create_dirs(self) casebuild = self.get_value("CASEBUILD") caseroot = self.get_value("CASEROOT") rundir = self.get_value("RUNDIR") docdir = os.path.join(caseroot, "CaseDocs") # Load modules self.load_env() self.stage_refcase() logger.info("Creating component namelists") # Create namelists - must have cpl last in the list below # Note - cpl must be last in the loop below so that in generating its namelist, # it can use xml vars potentially set by other component's buildnml scripts models = self.get_values("COMP_CLASSES") models += [models.pop(0)] for model in models: model_str = model.lower() logger.info(" {} {}".format(time.strftime("%Y-%m-%d %H:%M:%S"),model_str)) config_file = self.get_value("CONFIG_{}_FILE".format(model_str.upper())) config_dir = os.path.dirname(config_file) if model_str == "cpl": compname = "drv" else: compname = self.get_value("COMP_{}".format(model_str.upper())) if component is None or component == model_str: # first look in the case SourceMods directory cmd = os.path.join(caseroot, "SourceMods", "src."+compname, "buildnml") if os.path.isfile(cmd): logger.warning("\nWARNING: Using local buildnml file {}\n".format(cmd)) else: # otherwise look in the component config_dir cmd = os.path.join(config_dir, "buildnml") expect(os.path.isfile(cmd), "Could not find buildnml file for component {}".format(compname)) run_sub_or_cmd(cmd, (caseroot), "buildnml", (self, caseroot, compname), case=self) logger.info("Finished creating component namelists") # Save namelists to docdir if (not os.path.isdir(docdir)): os.makedirs(docdir) try: with open(os.path.join(docdir, "README"), "w") as fd: fd.write(" CESM Resolved Namelist Files\n For documentation only DO NOT MODIFY\n") except (OSError, IOError) as e: expect(False, "Failed to write {}/README: {}".format(docdir, e)) for cpglob in ["*_in_[0-9]*", "*modelio*", "*_in", "cesm.runconfig*", "*streams*txt*", "*stxt", "*maps.rc", "*cism.config*"]: for file_to_copy in glob.glob(os.path.join(rundir, cpglob)): logger.debug("Copy file from '{}' to '{}'".format(file_to_copy, docdir)) safe_copy(file_to_copy, docdir) # Copy over chemistry mechanism docs if they exist if (os.path.isdir(os.path.join(casebuild, "camconf"))): for file_to_copy in glob.glob(os.path.join(casebuild, "camconf", "*chem_mech*")): safe_copy(file_to_copy, docdir)
comments += " Copying '{}' to '{}'\n".format( test_hist, new_file) # Need to copy rather than move in case there are some history files # that will need to continue to be filled on the next phase; this # can be the case for a restart run. # # (If it weren't for that possibility, a move/rename would be more # robust here: The problem with a copy is that there can be # confusion after the second run as to which files were created by # the first run and which by the second. For example, if the second # run fails to output any history files, the test will still pass, # because the test system will think that run1's files were output # by run2. But we live with that downside for the sake of the reason # noted above.) safe_copy(test_hist, new_file) expect(num_copied > 0, "copy failed: no hist files found in rundir '{}'".format(rundir)) return comments def _hists_match(model, hists1, hists2, suffix1="", suffix2=""): """ return (num in set 1 but not 2 , num in set 2 but not 1, matchups) >>> hists1 = ['FOO.G.cpl.h1.nc', 'FOO.G.cpl.h2.nc', 'FOO.G.cpl.h3.nc'] >>> hists2 = ['cpl.h2.nc', 'cpl.h3.nc', 'cpl.h4.nc'] >>> _hists_match('cpl', hists1, hists2) (['FOO.G.cpl.h1.nc'], ['cpl.h4.nc'], [('FOO.G.cpl.h2.nc', 'cpl.h2.nc'), ('FOO.G.cpl.h3.nc', 'cpl.h3.nc')])
def _getTiming(self, inst=0): components=self.case.get_values("COMP_CLASSES") for s in components: self.models[s] = _GetTimingInfo(s) atm = self.models['ATM'] lnd = self.models['LND'] rof = self.models['ROF'] ice = self.models['ICE'] ocn = self.models['OCN'] glc = self.models['GLC'] cpl = self.models['CPL'] cime_model = self.case.get_value("MODEL") caseid = self.case.get_value("CASE") mach = self.case.get_value("MACH") user = self.case.get_value("USER") continue_run = self.case.get_value("CONTINUE_RUN") rundir = self.case.get_value("RUNDIR") run_type = self.case.get_value("RUN_TYPE") ncpl_base_period = self.case.get_value("NCPL_BASE_PERIOD") ncpl = 0 for compclass in self.case.get_values("COMP_CLASSES"): comp_ncpl = self.case.get_value("{}_NCPL".format(compclass)) if compclass == "OCN": ocn_ncpl = comp_ncpl if comp_ncpl is not None: ncpl = max(ncpl, comp_ncpl) compset = self.case.get_value("COMPSET") if compset is None: compset = "" grid = self.case.get_value("GRID") run_type = self.case.get_value("RUN_TYPE") stop_option = self.case.get_value("STOP_OPTION") stop_n = self.case.get_value("STOP_N") cost_pes = self.case.get_value("COST_PES") costpes_per_node = self.case.get_value("COSTPES_PER_NODE") totalpes = self.case.get_value("TOTALPES") max_mpitasks_per_node = self.case.get_value("MAX_MPITASKS_PER_NODE") smt_factor = max(1,int(self.case.get_value("MAX_TASKS_PER_NODE") / max_mpitasks_per_node)) if cost_pes > 0: pecost = cost_pes elif costpes_per_node: pecost = self.case.num_nodes * costpes_per_node else: pecost = totalpes for m in self.models.values(): for key in ["NTASKS", "ROOTPE", "PSTRID", "NTHRDS", "NINST"]: if key == "NINST" and m.name == "CPL": m.ninst = 1 else: setattr(m, key.lower(), int(self.case.get_value("{}_{}".format(key, m.name)))) m.comp = self.case.get_value("COMP_{}".format(m.name)) m.pemax = m.rootpe + m.ntasks * m.pstrid - 1 now = datetime.datetime.ctime(datetime.datetime.now()) inittype = "FALSE" if (run_type == "startup" or run_type == "hybrid") and \ not continue_run: inittype = "TRUE" if inst > 0: inst_label = '_{:04d}'.format(inst) else: inst_label = '' binfilename = os.path.join(rundir, "timing", "model_timing{}_stats" . format(inst_label)) finfilename = os.path.join(self.caseroot, "timing", "{}_timing{}_stats.{}".format(cime_model, inst_label, self.lid)) foutfilename = os.path.join(self.caseroot, "timing", "{}_timing{}.{}.{}".format(cime_model, inst_label, caseid, self.lid)) timingDir = os.path.join(self.caseroot, "timing") if not os.path.isfile(binfilename): logger.warning("No timing file found in run directory") return if not os.path.isdir(timingDir): os.makedirs(timingDir) safe_copy(binfilename, finfilename) os.chdir(self.caseroot) try: fin = open(finfilename, "r") self.finlines = fin.readlines() fin.close() except Exception as e: logger.critical("Unable to open file {}".format(finfilename)) raise e tlen = 1.0 if ncpl_base_period == "decade": tlen = 3650.0 elif ncpl_base_period == "year": tlen = 365.0 elif ncpl_base_period == "day": tlen = 1.0 elif ncpl_base_period == "hour": tlen = 1.0/24.0 else: logger.warning("Unknown NCPL_BASE_PERIOD={}".format(ncpl_base_period)) nprocs, ncount = self.gettime2('CPL:CLOCK_ADVANCE ') nsteps = ncount / nprocs adays = nsteps*tlen/ncpl odays = nsteps*tlen/ncpl if inittype == "TRUE": odays = odays - (tlen/ocn_ncpl) peminmax = max([m.rootpe for m in self.models.values()])+1 if ncpl_base_period in ["decade","year","day"] and int(adays) > 0: adays = int(adays) if tlen % ocn_ncpl == 0: odays = int(odays) self.adays = adays maxoffset = 40 extraoff = 20 for m in self.models.values(): m.offset = int((maxoffset*m.rootpe)/peminmax) + extraoff cpl.offset = 0 try: self.fout = open(foutfilename, "w") except Exception as e: logger.critical("Could not open file for writing: {}".format(foutfilename)) raise e self.write("---------------- TIMING PROFILE ---------------------\n") self.write(" Case : {}\n".format(caseid)) self.write(" LID : {}\n".format(self.lid)) self.write(" Machine : {}\n".format(mach)) self.write(" Caseroot : {}\n".format(self.caseroot)) self.write(" Timeroot : {}/Tools\n".format(self.caseroot)) self.write(" User : {}\n".format(user)) self.write(" Curr Date : {}\n".format(now)) self.write(" grid : {}\n".format(grid)) self.write(" compset : {}\n".format(compset)) self.write(" run_type : {}, continue_run = {} (inittype = {})\n".format(run_type, str(continue_run).upper(), inittype)) self.write(" stop_option : {}, stop_n = {}\n".format(stop_option, stop_n)) self.write(" run_length : {} days ({} for ocean)\n\n".format(adays, odays)) self.write(" component comp_pes root_pe tasks " "x threads" " instances (stride) \n") self.write(" --------- ------ ------- ------ " "------ --------- ------ \n") maxthrds = 0 for k in self.case.get_values("COMP_CLASSES"): m = self.models[k] if m.comp == "cpl": comp_label = m.comp + inst_label else: comp_label = m.comp self.write(" {} = {:<8s} {:<6d} {:<6d} {:<6d} x {:<6d} {:<6d} ({:<6d}) \n".format(m.name.lower(), comp_label, (m.ntasks*m.nthrds *smt_factor), m.rootpe, m.ntasks, m.nthrds, m.ninst, m.pstrid)) if m.nthrds > maxthrds: maxthrds = m.nthrds nmax = self.gettime(' CPL:INIT ')[1] tmax = self.gettime(' CPL:RUN_LOOP ')[1] wtmin = self.gettime(' CPL:TPROF_WRITE ')[0] fmax = self.gettime(' CPL:FINAL ')[1] for k in components: if k != "CPL": m = self.models[k] m.tmin, m.tmax, _ = self.gettime(' CPL:{}_RUN '.format(m.name)) otmin, otmax, _ = self.gettime(' CPL:OCNT_RUN ') # pick OCNT_RUN for tight coupling if otmax > ocn.tmax: ocn.tmin = otmin ocn.tmax = otmax cpl.tmin, cpl.tmax, _ = self.gettime(' CPL:RUN ') xmax = self.gettime(' CPL:COMM ')[1] ocnwaittime = self.gettime(' CPL:C2O_INITWAIT')[0] if odays != 0: ocnrunitime = ocn.tmax * (adays/odays - 1.0) else: ocnrunitime = 0.0 correction = max(0, ocnrunitime - ocnwaittime) tmax = tmax + wtmin + correction ocn.tmax += ocnrunitime for m in self.models.values(): m.tmaxr = 0 if m.tmax > 0: m.tmaxr = adays*86400.0/(m.tmax*365.0) xmaxr = 0 if xmax > 0: xmaxr = adays*86400.0/(xmax*365.0) tmaxr = 0 if tmax > 0: tmaxr = adays*86400.0/(tmax*365.0) self.write("\n") self.write(" total pes active : {} \n".format(totalpes*maxthrds*smt_factor )) self.write(" mpi tasks per node : {} \n".format(max_mpitasks_per_node)) self.write(" pe count for cost estimate : {} \n".format(pecost)) self.write("\n") self.write(" Overall Metrics: \n") self.write(" Model Cost: {:10.2f} pe-hrs/simulated_year \n".format((tmax*365.0*pecost)/(3600.0*adays))) self.write(" Model Throughput: {:10.2f} simulated_years/day \n".format((86400.0*adays)/(tmax*365.0)) ) self.write("\n") self.write(" Init Time : {:10.3f} seconds \n".format(nmax)) self.write(" Run Time : {:10.3f} seconds {:10.3f} seconds/day \n".format(tmax, tmax/adays)) self.write(" Final Time : {:10.3f} seconds \n".format(fmax)) self.write("\n") self.write(" Actual Ocn Init Wait Time : {:10.3f} seconds \n".format(ocnwaittime)) self.write(" Estimated Ocn Init Run Time : {:10.3f} seconds \n".format(ocnrunitime)) self.write(" Estimated Run Time Correction : {:10.3f} seconds \n".format(correction)) self.write(" (This correction has been applied to the ocean and" " total run times) \n") self.write("\n") self.write("Runs Time in total seconds, seconds/model-day, and" " model-years/wall-day \n") self.write("CPL Run Time represents time in CPL pes alone, " "not including time associated with data exchange " "with other components \n") self.write("\n") self.write(" TOT Run Time: {:10.3f} seconds {:10.3f} seconds/mday {:10.2f} myears/wday \n".format(tmax, tmax/adays, tmaxr)) for k in self.case.get_values("COMP_CLASSES"): m = self.models[k] self.write(" {} Run Time: {:10.3f} seconds {:10.3f} seconds/mday {:10.2f} myears/wday \n".format(k, m.tmax, m.tmax/adays, m.tmaxr)) self.write(" CPL COMM Time: {:10.3f} seconds {:10.3f} seconds/mday {:10.2f} myears/wday \n".format(xmax, xmax/adays, xmaxr)) self.write("\n\n---------------- DRIVER TIMING FLOWCHART " "--------------------- \n\n") pstrlen = 25 hoffset = 1 self.write(" NOTE: min:max driver timers (seconds/day): \n") for k in ["CPL", "OCN", "LND", "ROF", "ICE", "ATM", "GLC", "WAV"]: m = self.models[k] xspace = (pstrlen+hoffset+m.offset) * ' ' self.write(" {} {} (pes {:d} to {:d}) \n".format(xspace, k, m.rootpe, m.pemax)) self.write("\n") self.prttime(' CPL:CLOCK_ADVANCE ') self.prttime(' CPL:OCNPRE1_BARRIER ') self.prttime(' CPL:OCNPRE1 ') self.prttime(' CPL:ATMOCN1_BARRIER ') self.prttime(' CPL:ATMOCN1 ') self.prttime(' CPL:OCNPREP_BARRIER ') self.prttime(' CPL:OCNPREP ') self.prttime(' CPL:C2O_BARRIER ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:C2O ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:LNDPREP_BARRIER ') self.prttime(' CPL:LNDPREP ') self.prttime(' CPL:C2L_BARRIER ', offset=lnd.offset, coff=cpl.offset) self.prttime(' CPL:C2L ', offset=lnd.offset, coff=cpl.offset) self.prttime(' CPL:ICEPREP_BARRIER ') self.prttime(' CPL:ICEPREP ') self.prttime(' CPL:C2I_BARRIER ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:C2I ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:WAVPREP_BARRIER ') self.prttime(' CPL:WAVPREP ') self.prttime(' CPL:C2W_BARRIER ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:C2W ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:ROFPREP_BARRIER ') self.prttime(' CPL:ROFPREP ') self.prttime(' CPL:C2R_BARRIER ', offset=rof.offset, coff=cpl.offset) self.prttime(' CPL:C2R ', offset=rof.offset, coff=cpl.offset) self.prttime(' CPL:ICE_RUN_BARRIER ', offset=ice.offset) self.prttime(' CPL:ICE_RUN ', offset=ice.offset) self.prttime(' CPL:LND_RUN_BARRIER ', offset=lnd.offset) self.prttime(' CPL:LND_RUN ', offset=lnd.offset) self.prttime(' CPL:ROF_RUN_BARRIER ', offset=rof.offset) self.prttime(' CPL:ROF_RUN ', offset=rof.offset) self.prttime(' CPL:WAV_RUN_BARRIER ', offset=rof.offset) self.prttime(' CPL:WAV_RUN ', offset=rof.offset) self.prttime(' CPL:OCNT_RUN_BARRIER ', offset=ocn.offset, div=odays) self.prttime(' CPL:OCNT_RUN ', offset=ocn.offset, div=odays) self.prttime(' CPL:O2CT_BARRIER ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:O2CT ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:OCNPOSTT_BARRIER ') self.prttime(' CPL:OCNPOSTT ') self.prttime(' CPL:ATMOCNP_BARRIER ') self.prttime(' CPL:ATMOCNP ') self.prttime(' CPL:L2C_BARRIER ', offset=lnd.offset, coff=cpl.offset) self.prttime(' CPL:L2C ', offset=lnd.offset, div=cpl.offset) self.prttime(' CPL:LNDPOST_BARRIER ') self.prttime(' CPL:LNDPOST ') self.prttime(' CPL:GLCPREP_BARRIER ') self.prttime(' CPL:GLCPREP ') self.prttime(' CPL:C2G_BARRIER ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:C2G ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:R2C_BARRIER ', offset=rof.offset, coff=cpl.offset) self.prttime(' CPL:R2C ', offset=rof.offset, coff=cpl.offset) self.prttime(' CPL:ROFPOST_BARRIER ') self.prttime(' CPL:ROFPOST ') self.prttime(' CPL:BUDGET1_BARRIER ') self.prttime(' CPL:BUDGET1 ') self.prttime(' CPL:I2C_BARRIER ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:I2C ', offset=ice.offset, coff=cpl.offset) self.prttime(' CPL:ICEPOST_BARRIER ') self.prttime(' CPL:ICEPOST ') self.prttime(' CPL:FRACSET_BARRIER ') self.prttime(' CPL:FRACSET ') self.prttime(' CPL:ATMOCN2_BARRIER ') self.prttime(' CPL:ATMOCN2 ') self.prttime(' CPL:OCNPRE2_BARRIER ') self.prttime(' CPL:OCNPRE2 ') self.prttime(' CPL:C2O2_BARRIER ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:C2O2 ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:ATMOCNQ_BARRIER') self.prttime(' CPL:ATMOCNQ ') self.prttime(' CPL:ATMPREP_BARRIER ') self.prttime(' CPL:ATMPREP ') self.prttime(' CPL:C2A_BARRIER ', offset=atm.offset, coff=cpl.offset) self.prttime(' CPL:C2A ', offset=atm.offset, coff=cpl.offset) self.prttime(' CPL:OCN_RUN_BARRIER ', offset=ocn.offset, div=odays) self.prttime(' CPL:OCN_RUN ', offset=ocn.offset, div=odays) self.prttime(' CPL:ATM_RUN_BARRIER ', offset=atm.offset) self.prttime(' CPL:ATM_RUN ', offset=atm.offset) self.prttime(' CPL:GLC_RUN_BARRIER ', offset=glc.offset) self.prttime(' CPL:GLC_RUN ', offset=glc.offset) self.prttime(' CPL:W2C_BARRIER ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:W2C ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:WAVPOST_BARRIER ') self.prttime(' CPL:WAVPOST ', cpl.offset) self.prttime(' CPL:G2C_BARRIER ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:G2C ', offset=glc.offset, coff=cpl.offset) self.prttime(' CPL:GLCPOST_BARRIER ') self.prttime(' CPL:GLCPOST ') self.prttime(' CPL:A2C_BARRIER ', offset=atm.offset, coff=cpl.offset) self.prttime(' CPL:A2C ', offset=atm.offset, coff=cpl.offset) self.prttime(' CPL:ATMPOST_BARRIER ') self.prttime(' CPL:ATMPOST ') self.prttime(' CPL:BUDGET2_BARRIER ') self.prttime(' CPL:BUDGET2 ') self.prttime(' CPL:BUDGET3_BARRIER ') self.prttime(' CPL:BUDGET3 ') self.prttime(' CPL:BUDGETF_BARRIER ') self.prttime(' CPL:BUDGETF ') self.prttime(' CPL:O2C_BARRIER ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:O2C ', offset=ocn.offset, div=odays, coff=cpl.offset) self.prttime(' CPL:OCNPOST_BARRIER ') self.prttime(' CPL:OCNPOST ') self.prttime(' CPL:RESTART_BARRIER ') self.prttime(' CPL:RESTART') self.prttime(' CPL:HISTORY_BARRIER ') self.prttime(' CPL:HISTORY ') self.prttime(' CPL:TSTAMP_WRITE ') self.prttime(' CPL:TPROF_WRITE ') self.prttime(' CPL:RUN_LOOP_BSTOP ') self.write("\n\n") self.write("More info on coupler timing:\n") self.write("\n") self.prttime(' CPL:OCNPRE1 ') self.prttime(' CPL:ocnpre1_atm2ocn ') self.write("\n") self.prttime(' CPL:OCNPREP ') self.prttime(' CPL:OCNPRE2 ') self.prttime(' CPL:ocnprep_avg ') self.prttime(' CPL:ocnprep_diagav ') self.write("\n") self.prttime(' CPL:LNDPREP ') self.prttime(' CPL:lndprep_atm2lnd ') self.prttime(' CPL:lndprep_mrgx2l ') self.prttime(' CPL:lndprep_diagav ') self.write("\n") self.prttime(' CPL:ICEPREP ') self.prttime(' CPL:iceprep_ocn2ice ') self.prttime(' CPL:iceprep_atm2ice ') self.prttime(' CPL:iceprep_mrgx2i ') self.prttime(' CPL:iceprep_diagav ') self.write("\n") self.prttime(' CPL:WAVPREP ') self.prttime(' CPL:wavprep_atm2wav ') self.prttime(' CPL:wavprep_ocn2wav ') self.prttime(' CPL:wavprep_ice2wav ') self.prttime(' CPL:wavprep_mrgx2w ') self.prttime(' CPL:wavprep_diagav ') self.write("\n") self.prttime(' CPL:ROFPREP ') self.prttime(' CPL:rofprep_l2xavg ') self.prttime(' CPL:rofprep_lnd2rof ') self.prttime(' CPL:rofprep_mrgx2r ') self.prttime(' CPL:rofprep_diagav ') self.write("\n") self.prttime(' CPL:GLCPREP ') self.prttime(' CPL:glcprep_avg ') self.prttime(' CPL:glcprep_lnd2glc ') self.prttime(' CPL:glcprep_mrgx2g ') self.prttime(' CPL:glcprep_diagav ') self.write("\n") self.prttime(' CPL:ATMPREP ') self.prttime(' CPL:atmprep_xao2atm ') self.prttime(' CPL:atmprep_ocn2atm ') self.prttime(' CPL:atmprep_alb2atm ') self.prttime(' CPL:atmprep_ice2atm ') self.prttime(' CPL:atmprep_lnd2atm ') self.prttime(' CPL:atmprep_mrgx2a ') self.prttime(' CPL:atmprep_diagav ') self.write("\n") self.prttime(' CPL:ATMOCNP ') self.prttime(' CPL:ATMOCN1 ') self.prttime(' CPL:ATMOCN2 ') self.prttime(' CPL:atmocnp_ice2ocn ') self.prttime(' CPL:atmocnp_wav2ocn ') self.prttime(' CPL:atmocnp_fluxo ') self.prttime(' CPL:atmocnp_fluxe ') self.prttime(' CPL:atmocnp_mrgx2o ') self.prttime(' CPL:atmocnp_accum ') self.prttime(' CPL:atmocnp_ocnalb ') self.write("\n") self.prttime(' CPL:ATMOCNQ ') self.prttime(' CPL:atmocnq_ocn2atm ') self.prttime(' CPL:atmocnq_fluxa ') self.prttime(' CPL:atmocnq_atm2ocnf ') self.write("\n") self.prttime(' CPL:OCNPOSTT ') self.prttime(' CPL:OCNPOST ') self.prttime(' CPL:ocnpost_diagav ') self.write("\n") self.prttime(' CPL:LNDPOST ') self.prttime(' CPL:lndpost_diagav ') self.prttime(' CPL:lndpost_acc2lr ') self.prttime(' CPL:lndpost_acc2lg ') self.write("\n") self.prttime(' CPL:ROFOST ') self.prttime(' CPL:rofpost_diagav ') self.prttime(' CPL:rofpost_histaux ') self.prttime(' CPL:rofpost_rof2lnd ') self.prttime(' CPL:rofpost_rof2ice ') self.prttime(' CPL:rofpost_rof2ocn ') self.write("\n") self.prttime(' CPL:ICEPOST ') self.prttime(' CPL:icepost_diagav ') self.write("\n") self.prttime(' CPL:WAVPOST ') self.prttime(' CPL:wavpost_diagav ') self.write("\n") self.prttime(' CPL:GLCPOST ') self.prttime(' CPL:glcpost_diagav ') self.prttime(' CPL:glcpost_glc2lnd ') self.prttime(' CPL:glcpost_glc2ice ') self.prttime(' CPL:glcpost_glc2ocn ') self.write("\n") self.prttime(' CPL:ATMPOST ') self.prttime(' CPL:atmpost_diagav ') self.write("\n") self.prttime(' CPL:BUDGET ') self.prttime(' CPL:BUDGET1 ') self.prttime(' CPL:BUDGET2 ') self.prttime(' CPL:BUDGET3 ') self.prttime(' CPL:BUDGETF ') self.write("\n\n") self.fout.close()
def generate_baseline(case, baseline_dir=None, allow_baseline_overwrite=False): """ copy the current test output to baseline result case - The case containing the hist files to be copied into baselines baseline_dir - Optionally, specify a specific baseline dir, otherwise it will be computed from case config allow_baseline_overwrite must be true to generate baselines to an existing directory. returns (SUCCESS, comments) """ rundir = case.get_value("RUNDIR") ref_case = case.get_value("RUN_REFCASE") if baseline_dir is None: baselineroot = case.get_value("BASELINE_ROOT") basegen_dir = os.path.join(baselineroot, case.get_value("BASEGEN_CASE")) else: basegen_dir = baseline_dir testcase = case.get_value("CASE") archive = case.get_env('archive') if not os.path.isdir(basegen_dir): os.makedirs(basegen_dir) if (os.path.isdir(os.path.join(basegen_dir,testcase)) and not allow_baseline_overwrite): expect(False, " Cowardly refusing to overwrite existing baseline directory") comments = "Generating baselines into '{}'\n".format(basegen_dir) num_gen = 0 for model in _iter_model_file_substrs(case): comments += " generating for model '{}'\n".format(model) if model == 'cpl': file_extensions = archive.get_hist_file_extensions(archive.get_entry('drv')) else: file_extensions = archive.get_hist_file_extensions(archive.get_entry(model)) hists = _get_latest_hist_files(model, rundir, file_extensions, ref_case=ref_case) logger.debug("latest_files: {}".format(hists)) num_gen += len(hists) for hist in hists: basename = hist[hist.rfind(model):] baseline = os.path.join(basegen_dir, basename) if os.path.exists(baseline): os.remove(baseline) safe_copy(hist, baseline) comments += " generating baseline '{}' from file {}\n".format(baseline, hist) # copy latest cpl log to baseline # drop the date so that the name is generic if case.get_value("COMP_INTERFACE") == "nuopc": cplname = "med" else: cplname = "cpl" newestcpllogfile = case.get_latest_cpl_log(coupler_log_path=case.get_value("RUNDIR"), cplname=cplname) if newestcpllogfile is None: logger.warning("No {}.log file found in directory {}".format(cplname,case.get_value("RUNDIR"))) else: safe_copy(newestcpllogfile, os.path.join(basegen_dir, "{}.log.gz".format(cplname))) expect(num_gen > 0, "Could not generate any hist files for case '{}', something is seriously wrong".format(os.path.join(rundir, testcase))) #make sure permissions are open in baseline directory for root, _, files in os.walk(basegen_dir): for name in files: try: os.chmod(os.path.join(root,name), stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH) except OSError: # We tried. Not worth hard failure here. pass if get_model() == "e3sm": bless_log = os.path.join(basegen_dir, BLESS_LOG_NAME) with open(bless_log, "a") as fd: fd.write("sha:{} date:{}\n".format(get_current_commit(repo=case.get_value("CIMEROOT")), get_timestamp(timestamp_format="%Y-%m-%d_%H:%M:%S"))) return True, comments
def create_cdash_upload_xml(results, cdash_build_name, cdash_build_group, utc_time, hostname, force_log_upload): ############################################################################### data_rel_path = os.path.join("Testing", utc_time) try: log_dir = "{}_logs".format(cdash_build_name) need_to_upload = False for test_name, test_data in results.items(): test_path, test_status = test_data if test_status not in [TEST_PASS_STATUS, NAMELIST_FAIL_STATUS] or force_log_upload: test_case_dir = os.path.dirname(test_path) ts = TestStatus(test_case_dir) build_status = ts.get_status(SHAREDLIB_BUILD_PHASE) build_status = TEST_FAIL_STATUS if build_status == TEST_FAIL_STATUS else ts.get_status(MODEL_BUILD_PHASE) run_status = ts.get_status(RUN_PHASE) baseline_status = ts.get_status(BASELINE_PHASE) if build_status == TEST_FAIL_STATUS or run_status == TEST_FAIL_STATUS or baseline_status == TEST_FAIL_STATUS or force_log_upload: case_dirs = [test_case_dir] case_base = os.path.basename(test_case_dir) test_case2_dir = os.path.join(test_case_dir, "case2", case_base) if os.path.exists(test_case2_dir): case_dirs.append(test_case2_dir) for case_dir in case_dirs: param = "EXEROOT" if build_status == TEST_FAIL_STATUS else "RUNDIR" log_src_dir = run_cmd_no_fail("./xmlquery {} --value".format(param), from_dir=case_dir) log_dst_dir = os.path.join(log_dir, "{}{}_{}_logs".format(test_name, "" if case_dir == test_case_dir else ".case2", param)) os.makedirs(log_dst_dir) for log_file in glob.glob(os.path.join(log_src_dir, "*log*")): safe_copy(log_file, log_dst_dir) for log_file in glob.glob(os.path.join(log_src_dir, "*.cprnc.out*")): safe_copy(log_file, log_dst_dir) need_to_upload = True if (need_to_upload): tarball = "{}.tar.gz".format(log_dir) if (os.path.exists(tarball)): os.remove(tarball) run_cmd_no_fail("tar -cf - {} | gzip -c".format(log_dir), arg_stdout=tarball) base64 = run_cmd_no_fail("base64 {}".format(tarball)) xml_text = \ r"""<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="Dart/Source/Server/XSL/Build.xsl <file:///Dart/Source/Server/XSL/Build.xsl> "?> <Site BuildName="{}" BuildStamp="{}-{}" Name="{}" Generator="ctest3.0.0"> <Upload> <File filename="{}"> <Content encoding="base64"> {} </Content> </File> </Upload> </Site> """.format(cdash_build_name, utc_time, cdash_build_group, hostname, os.path.abspath(tarball), base64) with open(os.path.join(data_rel_path, "Upload.xml"), "w") as fd: fd.write(xml_text) finally: if (os.path.isdir(log_dir)): shutil.rmtree(log_dir)
def stage_refcase(self, input_data_root=None, data_list_dir=None): """ Get a REFCASE for a hybrid or branch run This is the only case in which we are downloading an entire directory instead of a single file at a time. """ get_refcase = self.get_value("GET_REFCASE") run_type = self.get_value("RUN_TYPE") continue_run = self.get_value("CONTINUE_RUN") # We do not fully populate the inputdata directory on every # machine and do not expect every user to download the 3TB+ of # data in our inputdata repository. This code checks for the # existence of inputdata in the local inputdata directory and # attempts to download data from the server if it's needed and # missing. if get_refcase and run_type != "startup" and not continue_run: din_loc_root = self.get_value("DIN_LOC_ROOT") run_refdate = self.get_value("RUN_REFDATE") run_refcase = self.get_value("RUN_REFCASE") run_refdir = self.get_value("RUN_REFDIR") rundir = self.get_value("RUNDIR") if os.path.isabs(run_refdir): refdir = run_refdir expect( os.path.isdir(refdir), "Reference case directory {} does not exist or is not readable" .format(refdir)) else: refdir = os.path.join(din_loc_root, run_refdir, run_refcase, run_refdate) if not os.path.isdir(refdir): logger.warning( "Refcase not found in {}, will attempt to download from inputdata" .format(refdir)) with open(os.path.join("Buildconf", "refcase.input_data_list"), "w") as fd: fd.write("refdir = {}{}".format(refdir, os.sep)) if input_data_root is None: input_data_root = din_loc_root if data_list_dir is None: data_list_dir = "Buildconf" success = _downloadfromserver(self, input_data_root=input_data_root, data_list_dir=data_list_dir) expect(success, "Could not download refcase from any server") logger.info(" - Prestaging REFCASE ({}) to {}".format(refdir, rundir)) # prestage the reference case's files. if (not os.path.exists(rundir)): logger.debug("Creating run directory: {}".format(rundir)) os.makedirs(rundir) rpointerfile = None # copy the refcases' rpointer files to the run directory for rpointerfile in glob.iglob( os.path.join("{}", "*rpointer*").format(refdir)): logger.info("Copy rpointer {}".format(rpointerfile)) safe_copy(rpointerfile, rundir) os.chmod(os.path.join(rundir, os.path.basename(rpointerfile)), 0o644) expect( rpointerfile, "Reference case directory {} does not contain any rpointer files". format(refdir)) # link everything else for rcfile in glob.iglob(os.path.join(refdir, "*")): rcbaseline = os.path.basename(rcfile) if not os.path.exists("{}/{}".format(rundir, rcbaseline)): logger.info("Staging file {}".format(rcfile)) os.symlink(rcfile, "{}/{}".format(rundir, rcbaseline)) # Backward compatibility, some old refcases have cam2 in the name # link to local cam file. for cam2file in glob.iglob( os.path.join("{}", "*.cam2.*").format(rundir)): camfile = cam2file.replace("cam2", "cam") os.symlink(cam2file, camfile) elif not get_refcase and run_type != "startup": logger.info( "GET_REFCASE is false, the user is expected to stage the refcase to the run directory." ) if os.path.exists(os.path.join("Buildconf", "refcase.input_data_list")): os.remove(os.path.join("Buildconf", "refcase.input_data_list")) return True
def create_clone(self, newcase, keepexe=False, mach_dir=None, project=None, cime_output_root=None, exeroot=None, rundir=None, user_mods_dir=None): """ Create a case clone If exeroot or rundir are provided (not None), sets these directories to the given paths; if not provided, uses default values for these directories. It is an error to provide exeroot if keepexe is True. """ if cime_output_root is None: cime_output_root = self.get_value("CIME_OUTPUT_ROOT") newcaseroot = os.path.abspath(newcase) expect(not os.path.isdir(newcaseroot), "New caseroot directory {} already exists".format(newcaseroot)) newcasename = os.path.basename(newcaseroot) expect(check_name(newcasename), "New case name invalid {} ".format(newcasename)) newcase_cimeroot = os.path.abspath(get_cime_root()) # create clone from case to case clone_cimeroot = self.get_value("CIMEROOT") if newcase_cimeroot != clone_cimeroot: logger.warning(" case CIMEROOT is {} ".format(newcase_cimeroot)) logger.warning(" clone CIMEROOT is {} ".format(clone_cimeroot)) logger.warning(" It is NOT recommended to clone cases from different versions of CIME.") # *** create case object as deepcopy of clone object *** srcroot = os.path.join(newcase_cimeroot,"..") newcase = self.copy(newcasename, newcaseroot, newsrcroot=srcroot) newcase.set_value("CIMEROOT", newcase_cimeroot) # if we are cloning to a different user modify the output directory olduser = self.get_value("USER") newuser = os.environ.get("USER") if olduser != newuser: cime_output_root = cime_output_root.replace(olduser, newuser) newcase.set_value("USER", newuser) newcase.set_value("CIME_OUTPUT_ROOT", cime_output_root) # try to make the new output directory and raise an exception # on any error other than directory already exists. if os.path.isdir(cime_output_root): expect(os.access(cime_output_root, os.W_OK), "Directory {} is not writable " "by this user. Use the --cime-output-root flag to provide a writable " "scratch directory".format(cime_output_root)) else: try: os.makedirs(cime_output_root) except: if not os.path.isdir(cime_output_root): raise # determine if will use clone executable or not if keepexe: orig_exeroot = self.get_value("EXEROOT") newcase.set_value("EXEROOT", orig_exeroot) newcase.set_value("BUILD_COMPLETE","TRUE") orig_bld_complete = self.get_value("BUILD_COMPLETE") if not orig_bld_complete: logger.warning("\nWARNING: Creating a clone with --keepexe before building the original case may cause PIO_TYPENAME to be invalid in the clone") logger.warning("Avoid this message by building case one before you clone.\n") else: newcase.set_value("BUILD_COMPLETE","FALSE") # set machdir if mach_dir is not None: newcase.set_value("MACHDIR", mach_dir) # set exeroot and rundir if requested if exeroot is not None: expect(not keepexe, "create_case_clone: if keepexe is True, " "then exeroot cannot be set") newcase.set_value("EXEROOT", exeroot) if rundir is not None: newcase.set_value("RUNDIR", rundir) # Set project id # Note: we do not just copy this from the clone because it seems likely that # users will want to change this sometimes, especially when cloning another # user's case. However, note that, if a project is not given, the fallback will # be to copy it from the clone, just like other xml variables are copied. if project is None: project = self.get_value("PROJECT", subgroup=self.get_primary_job()) if project is not None: newcase.set_value("PROJECT", project) # create caseroot newcase.create_caseroot(clone=True) newcase.flush(flushall=True) # copy user_ files cloneroot = self.get_case_root() files = glob.glob(cloneroot + '/user_*') for item in files: safe_copy(item, newcaseroot) # copy SourceMod and Buildconf files # if symlinks exist, copy rather than follow links for casesub in ("SourceMods", "Buildconf"): shutil.copytree(os.path.join(cloneroot, casesub), os.path.join(newcaseroot, casesub), symlinks=True) # lock env_case.xml in new case lock_file("env_case.xml", newcaseroot) # apply user_mods if appropriate newcase_root = newcase.get_value("CASEROOT") if user_mods_dir is not None: if keepexe: # If keepexe CANNOT change any env_build.xml variables - so make a temporary copy of # env_build.xml and verify that it has not been modified safe_copy(os.path.join(newcaseroot, "env_build.xml"), os.path.join(newcaseroot, "LockedFiles", "env_build.xml")) # Now apply contents of user_mods directory apply_user_mods(newcase_root, user_mods_dir, keepexe=keepexe) # Determine if env_build.xml has changed if keepexe: success, comment = compare_files(os.path.join(newcaseroot, "env_build.xml"), os.path.join(newcaseroot, "LockedFiles", "env_build.xml")) if not success: logger.warning(comment) shutil.rmtree(newcase_root) expect(False, "env_build.xml cannot be changed via usermods if keepexe is an option: \n " "Failed to clone case, removed {}\n".format(newcase_root)) # if keep executable, then remove the new case SourceMods directory and link SourceMods to # the clone directory if keepexe: shutil.rmtree(os.path.join(newcase_root, "SourceMods")) os.symlink(os.path.join(cloneroot, "SourceMods"), os.path.join(newcase_root, "SourceMods")) # Update README.case fclone = open(cloneroot + "/README.case", "r") fnewcase = open(newcaseroot + "/README.case", "a") fnewcase.write("\n *** original clone README follows ****") fnewcase.write("\n " + fclone.read()) clonename = self.get_value("CASE") logger.info(" Successfully created new case {} from clone case {} ".format(newcasename, clonename)) newcase.case_setup() return newcase
def _save_postrun_timing_e3sm(case, lid): caseroot = case.get_value("CASEROOT") rundir = case.get_value("RUNDIR") # tar timings rundir_timing_dir = os.path.join(rundir, "timing." + lid) shutil.move(os.path.join(rundir, "timing"), rundir_timing_dir) with tarfile.open("%s.tar.gz" % rundir_timing_dir, "w:gz") as tfd: tfd.add(rundir_timing_dir, arcname=os.path.basename(rundir_timing_dir)) shutil.rmtree(rundir_timing_dir) gzip_existing_file(os.path.join(caseroot, "timing", "e3sm_timing_stats.%s" % lid)) # JGF: not sure why we do this timing_saved_file = "timing.%s.saved" % lid touch(os.path.join(caseroot, "timing", timing_saved_file)) project = case.get_value("PROJECT", subgroup=case.get_primary_job()) if not case.is_save_timing_dir_project(project): return timing_dir = case.get_value("SAVE_TIMING_DIR") if timing_dir is None or not os.path.isdir(timing_dir): return mach = case.get_value("MACH") base_case = case.get_value("CASE") full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) if not os.path.isdir(full_timing_dir): return # Kill mach_syslog job_id = _get_batch_job_id_for_syslog(case) if job_id is not None: syslog_jobid_path = os.path.join(rundir, "syslog_jobid.{}".format(job_id)) if os.path.exists(syslog_jobid_path): try: with open(syslog_jobid_path, "r") as fd: syslog_jobid = int(fd.read().strip()) os.kill(syslog_jobid, signal.SIGTERM) except (ValueError, OSError) as e: logger.warning("Failed to kill syslog: {}".format(e)) finally: os.remove(syslog_jobid_path) # copy timings safe_copy("%s.tar.gz" % rundir_timing_dir, full_timing_dir, preserve_meta=False) # # save output files and logs # globs_to_copy = [] if job_id is not None: if mach == "titan": globs_to_copy.append("%s*OU" % job_id) elif mach == "anvil": globs_to_copy.append("/home/%s/%s*OU" % (getpass.getuser(), job_id)) elif mach in ["mira", "theta"]: globs_to_copy.append("%s*error" % job_id) globs_to_copy.append("%s*output" % job_id) globs_to_copy.append("%s*cobaltlog" % job_id) elif mach in ["edison", "cori-haswell", "cori-knl"]: globs_to_copy.append("%s*run*%s" % (case.get_value("CASE"), job_id)) elif mach == "summit": globs_to_copy.append("e3sm.stderr.%s" % job_id) globs_to_copy.append("e3sm.stdout.%s" % job_id) globs_to_copy.append("logs/run_environment.txt.{}".format(lid)) globs_to_copy.append(os.path.join(rundir, "e3sm.log.{}.gz".format(lid))) globs_to_copy.append(os.path.join(rundir, "cpl.log.{}.gz".format(lid))) globs_to_copy.append("timing/*.{}*".format(lid)) globs_to_copy.append("CaseStatus") for glob_to_copy in globs_to_copy: for item in glob.glob(os.path.join(caseroot, glob_to_copy)): basename = os.path.basename(item) if basename != timing_saved_file: if lid not in basename and not basename.endswith(".gz"): safe_copy(item, os.path.join(full_timing_dir, "{}.{}".format(basename, lid)), preserve_meta=False) else: safe_copy(item, full_timing_dir, preserve_meta=False) # zip everything for root, _, files in os.walk(full_timing_dir): for filename in files: if not filename.endswith(".gz"): gzip_existing_file(os.path.join(root, filename))
def _archive_restarts_date_comp(case, casename, rundir, archive, archive_entry, compclass, compname, datename, datename_is_last, last_date, archive_restdir, archive_file_fn, link_to_last_restart_files=False, testonly=False): ############################################################################### """ Archive restart files for a single date and single component If link_to_last_restart_files is True, then make a symlink to the last set of restart files (i.e., the set with datename_is_last True); if False (the default), copy them. (This has no effect on the history files that are associated with these restart files.) """ datename_str = _datetime_str(datename) if datename_is_last or case.get_value('DOUT_S_SAVE_INTERIM_RESTART_FILES'): if not os.path.exists(archive_restdir): os.makedirs(archive_restdir) # archive the rpointer file(s) for this datename and all possible ninst_strings _archive_rpointer_files(casename, _get_ninst_info(case, compclass)[1], rundir, case.get_value('DOUT_S_SAVE_INTERIM_RESTART_FILES'), archive, archive_entry, archive_restdir, datename, datename_is_last) # move all but latest restart files into the archive restart directory # copy latest restart files to archive restart directory histfiles_savein_rundir = [] # determine function to use for last set of restart files if link_to_last_restart_files: last_restart_file_fn = symlink_force last_restart_file_fn_msg = "linking" else: last_restart_file_fn = safe_copy last_restart_file_fn_msg = "copying" # the compname is drv but the files are named cpl if compname == 'drv': compname = 'cpl' # get file_extension suffixes for suffix in archive.get_rest_file_extensions(archive_entry): # logger.debug("suffix is {} ninst {}".format(suffix, ninst)) restfiles = "" if compname.find('mpas') == 0 or compname == 'mali': pattern = compname + r'\.' + suffix + r'\.' + '_'.join(datename_str.rsplit('-', 1)) pfile = re.compile(pattern) restfiles = [f for f in os.listdir(rundir) if pfile.search(f)] else: pattern = r"^{}\.{}[\d_]*\.".format(casename, compname) pfile = re.compile(pattern) files = [f for f in os.listdir(rundir) if pfile.search(f)] pattern = r'_?' + r'\d*' + r'\.' + suffix + r'\.' + r'[^\.]*' + r'\.?' + datename_str pfile = re.compile(pattern) restfiles = [f for f in files if pfile.search(f)] logger.debug("pattern is {} restfiles {}".format(pattern, restfiles)) for restfile in restfiles: restfile = os.path.basename(restfile) file_date = get_file_date(restfile) if last_date is not None and file_date > last_date: # Skip this file continue if not os.path.exists(archive_restdir): os.makedirs(archive_restdir) # obtain array of history files for restarts # need to do this before archiving restart files histfiles_for_restart = get_histfiles_for_restarts(rundir, archive, archive_entry, restfile, testonly=testonly) if datename_is_last and histfiles_for_restart: for histfile in histfiles_for_restart: if histfile not in histfiles_savein_rundir: histfiles_savein_rundir.append(histfile) # archive restart files and all history files that are needed for restart # Note that the latest file should be copied and not moved if datename_is_last: srcfile = os.path.join(rundir, restfile) destfile = os.path.join(archive_restdir, restfile) last_restart_file_fn(srcfile, destfile) logger.info("{} file {} to {}".format(last_restart_file_fn_msg, srcfile, destfile)) for histfile in histfiles_for_restart: srcfile = os.path.join(rundir, histfile) destfile = os.path.join(archive_restdir, histfile) expect(os.path.isfile(srcfile), "history restart file {} for last date does not exist ".format(srcfile)) logger.info("Copying {} to {}".format(srcfile, destfile)) safe_copy(srcfile, destfile) logger.debug("datename_is_last + histfiles_for_restart copying \n {} to \n {}".format(srcfile, destfile)) else: # Only archive intermediate restarts if requested - otherwise remove them if case.get_value('DOUT_S_SAVE_INTERIM_RESTART_FILES'): srcfile = os.path.join(rundir, restfile) destfile = os.path.join(archive_restdir, restfile) expect(os.path.isfile(srcfile), "restart file {} does not exist ".format(srcfile)) archive_file_fn(srcfile, destfile) logger.info("moving file {} to {}".format(srcfile, destfile)) # need to copy the history files needed for interim restarts - since # have not archived all of the history files yet for histfile in histfiles_for_restart: srcfile = os.path.join(rundir, histfile) destfile = os.path.join(archive_restdir, histfile) expect(os.path.isfile(srcfile), "hist file {} does not exist ".format(srcfile)) logger.info("copying {} to {}".format(srcfile, destfile)) safe_copy(srcfile, destfile) else: srcfile = os.path.join(rundir, restfile) logger.info("removing interim restart file {}".format(srcfile)) if (os.path.isfile(srcfile)): try: os.remove(srcfile) except OSError: logger.warning("unable to remove interim restart file {}".format(srcfile)) else: logger.warning("interim restart file {} does not exist".format(srcfile)) return histfiles_savein_rundir
def apply_user_mods(caseroot, user_mods_path, keepexe=None): ''' Recursivlely apply user_mods to caseroot - this includes updating user_nl_xxx, updating SourceMods and creating case shell_commands and xmlchange_cmds files First remove case shell_commands files if any already exist If this function is called multiple times, settings from later calls will take precedence over earlier calls, if there are conflicts. keepexe is an optional argument that is needed for cases where apply_user_mods is called from create_clone ''' case_shell_command_files = [os.path.join(caseroot,"shell_commands"), os.path.join(caseroot,"xmlchange_cmnds")] for shell_command_file in case_shell_command_files: if os.path.isfile(shell_command_file): os.remove(shell_command_file) include_dirs = build_include_dirs_list(user_mods_path) # If a user_mods dir 'foo' includes 'bar', the include_dirs list returned # from build_include_dirs has 'foo' before 'bar'. But with the below code, # directories that occur later in the list take precedence over the earlier # ones, and we want 'foo' to take precedence over 'bar' in this case (in # general: we want a given user_mods directory to take precedence over any # mods that it includes). So we reverse include_dirs to accomplish this. include_dirs.reverse() logger.debug("include_dirs are {}".format(include_dirs)) for include_dir in include_dirs: # write user_nl_xxx file in caseroot for user_nl in glob.iglob(os.path.join(include_dir,"user_nl_*")): with open(os.path.join(include_dir, user_nl), "r") as fd: newcontents = fd.read() if len(newcontents) == 0: continue case_user_nl = user_nl.replace(include_dir, caseroot) # If the same variable is set twice in a user_nl file, the later one # takes precedence. So by appending the new contents, later entries # in the include_dirs list take precedence over earlier entries. with open(case_user_nl, "a") as fd: fd.write(newcontents) # update SourceMods in caseroot for root, _, files in os.walk(include_dir,followlinks=True,topdown=False): if "src" in os.path.basename(root): if keepexe is not None: expect(False, "cannot have any source mods in {} if keepexe is an option".format(user_mods_path)) for sfile in files: source_mods = os.path.join(root,sfile) case_source_mods = source_mods.replace(include_dir, caseroot) # We overwrite any existing SourceMods file so that later # include_dirs take precedence over earlier ones if os.path.isfile(case_source_mods): logger.warning("WARNING: Overwriting existing SourceMods in {}".format(case_source_mods)) else: logger.info("Adding SourceMod to case {}".format(case_source_mods)) try: safe_copy(source_mods, case_source_mods) except Exception: expect(False, "Could not write file {} in caseroot {}".format(case_source_mods,caseroot)) # create xmlchange_cmnds and shell_commands in caseroot shell_command_files = glob.glob(os.path.join(include_dir,"shell_commands")) +\ glob.glob(os.path.join(include_dir,"xmlchange_cmnds")) for shell_commands_file in shell_command_files: case_shell_commands = shell_commands_file.replace(include_dir, caseroot) # add commands from both shell_commands and xmlchange_cmnds to # the same file (caseroot/shell_commands) case_shell_commands = case_shell_commands.replace("xmlchange_cmnds","shell_commands") # Note that use of xmlchange_cmnds has been deprecated and will soon # be removed altogether, so new tests should rely on shell_commands if shell_commands_file.endswith("xmlchange_cmnds"): logger.warning("xmlchange_cmnds is deprecated and will be removed " +\ "in a future release; please rename {} shell_commands".format(shell_commands_file)) with open(shell_commands_file,"r") as fd: new_shell_commands = fd.read().replace("xmlchange","xmlchange --force") # By appending the new commands to the end, settings from later # include_dirs take precedence over earlier ones with open(case_shell_commands, "a") as fd: fd.write(new_shell_commands) for shell_command_file in case_shell_command_files: if os.path.isfile(shell_command_file): os.chmod(shell_command_file, 0o777) run_cmd_no_fail(shell_command_file,verbose=True)
def stage_refcase(self, input_data_root=None, data_list_dir=None): """ Get a REFCASE for a hybrid or branch run This is the only case in which we are downloading an entire directory instead of a single file at a time. """ get_refcase = self.get_value("GET_REFCASE") run_type = self.get_value("RUN_TYPE") continue_run = self.get_value("CONTINUE_RUN") # We do not fully populate the inputdata directory on every # machine and do not expect every user to download the 3TB+ of # data in our inputdata repository. This code checks for the # existence of inputdata in the local inputdata directory and # attempts to download data from the server if it's needed and # missing. if get_refcase and run_type != "startup" and not continue_run: din_loc_root = self.get_value("DIN_LOC_ROOT") run_refdate = self.get_value("RUN_REFDATE") run_refcase = self.get_value("RUN_REFCASE") run_refdir = self.get_value("RUN_REFDIR") rundir = self.get_value("RUNDIR") if os.path.isabs(run_refdir): refdir = run_refdir else: refdir = os.path.join(din_loc_root, run_refdir, run_refcase, run_refdate) if not os.path.isdir(refdir): logger.warning("Refcase not found in {}, will attempt to download from inputdata".format(refdir)) with open(os.path.join("Buildconf","refcase.input_data_list"),"w") as fd: fd.write("refdir = {}{}".format(refdir, os.sep)) if input_data_root is None: input_data_root = din_loc_root if data_list_dir is None: data_list_dir = "Buildconf" success = _downloadfromserver(self, input_data_root=input_data_root, data_list_dir=data_list_dir) expect(success, "Could not download refcase from any server") logger.info(" - Prestaging REFCASE ({}) to {}".format(refdir, rundir)) # prestage the reference case's files. if (not os.path.exists(rundir)): logger.debug("Creating run directory: {}".format(rundir)) os.makedirs(rundir) # copy the refcases' rpointer files to the run directory for rpointerfile in glob.iglob(os.path.join("{}","*rpointer*").format(refdir)): logger.info("Copy rpointer {}".format(rpointerfile)) safe_copy(rpointerfile, rundir) # link everything else for rcfile in glob.iglob(os.path.join(refdir,"*")): rcbaseline = os.path.basename(rcfile) if not os.path.exists("{}/{}".format(rundir, rcbaseline)): logger.info("Staging file {}".format(rcfile)) os.symlink(rcfile, "{}/{}".format(rundir, rcbaseline)) # Backward compatibility, some old refcases have cam2 in the name # link to local cam file. for cam2file in glob.iglob(os.path.join("{}","*.cam2.*").format(rundir)): camfile = cam2file.replace("cam2", "cam") os.symlink(cam2file, camfile) elif not get_refcase and run_type != "startup": logger.info("GET_REFCASE is false, the user is expected to stage the refcase to the run directory.") if os.path.exists(os.path.join("Buildconf","refcase.input_data_list")): os.remove(os.path.join("Buildconf","refcase.input_data_list")) return True
def _compare_hists(case, from_dir1, from_dir2, suffix1="", suffix2="", outfile_suffix=""): if from_dir1 == from_dir2: expect(suffix1 != suffix2, "Comparing files to themselves?") testcase = case.get_value("CASE") casedir = case.get_value("CASEROOT") all_success = True num_compared = 0 comments = "Comparing hists for case '{}' dir1='{}', suffix1='{}', dir2='{}' suffix2='{}'\n".format(testcase, from_dir1, suffix1, from_dir2, suffix2) multiinst_driver_compare = False archive = case.get_env('archive') ref_case = case.get_value("RUN_REFCASE") for model in _iter_model_file_substrs(case): if model == 'cpl' and suffix2 == 'multiinst': multiinst_driver_compare = True comments += " comparing model '{}'\n".format(model) if model == 'cpl': file_extensions = archive.get_hist_file_extensions(archive.get_entry('drv')) else: file_extensions = archive.get_hist_file_extensions(archive.get_entry(model)) hists1 = _get_latest_hist_files(model, from_dir1, file_extensions, suffix=suffix1, ref_case=ref_case) hists2 = _get_latest_hist_files(model, from_dir2, file_extensions, suffix=suffix2, ref_case=ref_case) if len(hists1) == 0 and len(hists2) == 0: comments += " no hist files found for model {}\n".format(model) continue one_not_two, two_not_one, match_ups = _hists_match(model, hists1, hists2, suffix1, suffix2) for item in one_not_two: comments += " File '{}' {} in '{}' with suffix '{}'\n".format(item, NO_COMPARE, from_dir2, suffix2) all_success = False for item in two_not_one: comments += " File '{}' {} in '{}' with suffix '{}'\n".format(item, NO_ORIGINAL, from_dir1, suffix1) all_success = False num_compared += len(match_ups) for hist1, hist2 in match_ups: success, cprnc_log_file = cprnc(model, hist1, hist2, case, from_dir1, multiinst_driver_compare=multiinst_driver_compare, outfile_suffix=outfile_suffix) if success: comments += " {} matched {}\n".format(hist1, hist2) else: comments += " {} {} {}\n".format(hist1, DIFF_COMMENT, hist2) comments += " cat " + cprnc_log_file + "\n" expected_log_file = os.path.join(casedir, os.path.basename(cprnc_log_file)) if not (os.path.exists(expected_log_file) and filecmp.cmp(cprnc_log_file, expected_log_file)): try: safe_copy(cprnc_log_file, casedir) except (OSError, IOError) as _: logger.warning("Could not copy {} to {}".format(cprnc_log_file, casedir)) all_success = False if num_compared == 0: all_success = False comments += "Did not compare any hist files! Missing baselines?\n" comments += "PASS" if all_success else "FAIL" return all_success, comments
exeroot = case.get_value("EXEROOT") caseroot = case.get_value("CASEROOT") # Save git describe describe_prov = os.path.join(exeroot, "GIT_DESCRIBE.{}".format(lid)) desc = get_current_commit(tag=True, repo=srcroot) with open(describe_prov, "w") as fd: fd.write(desc) # Save HEAD headfile = os.path.join(srcroot, ".git", "logs", "HEAD") headfile_prov = os.path.join(exeroot, "GIT_LOGS_HEAD.{}".format(lid)) if os.path.exists(headfile_prov): os.remove(headfile_prov) if os.path.exists(headfile): safe_copy(headfile, headfile_prov, preserve_meta=False) # Save git submodule status submodule_prov = os.path.join(exeroot, "GIT_SUBMODULE_STATUS.{}".format(lid)) subm_status = get_current_submodule_status(recursive=True, repo=srcroot) with open(submodule_prov, "w") as fd: fd.write(subm_status) _record_git_provenance(srcroot, exeroot, lid) # Save SourceMods sourcemods = os.path.join(caseroot, "SourceMods") sourcemods_prov = os.path.join(exeroot, "SourceMods.{}.tar.gz".format(lid)) if os.path.exists(sourcemods_prov): os.remove(sourcemods_prov) if os.path.isdir(sourcemods):
def _build_model(build_threaded, exeroot, clm_config_opts, incroot, complist, lid, caseroot, cimeroot, compiler, buildlist, comp_interface): ############################################################################### logs = [] thread_bad_results = [] for model, comp, nthrds, _, config_dir in complist: if buildlist is not None and model.lower() not in buildlist: continue # aquap has a dependency on atm so we will build it after the threaded loop if comp == "aquap": logger.debug("Skip aquap ocn build here") continue # coupler handled seperately if model == "cpl": continue # special case for clm # clm 4_0 is not a shared (as in sharedlibs, shared by all tests) library and must be built here # clm 4_5 and newer is a shared library (but not in E3SM) and should be built in build_libraries if get_model() != "e3sm": if comp == "clm": if "clm4_0" in clm_config_opts: logger.info(" - Building clm4_0 Library ") else: continue else: logger.info(" - Building {} Library ".format(model)) smp = nthrds > 1 or build_threaded bldroot = os.path.join(exeroot, model, "obj") libroot = os.path.join(exeroot, "lib") file_build = os.path.join(exeroot, "{}.bldlog.{}".format(model, lid)) logger.debug("bldroot is {}".format(bldroot)) logger.debug("libroot is {}".format(libroot)) # make sure bldroot and libroot exist for build_dir in [bldroot, libroot]: if not os.path.exists(build_dir): os.makedirs(build_dir) # build the component library # thread_bad_results captures error output from thread (expected to be empty) # logs is a list of log files to be compressed and added to the case logs/bld directory t = threading.Thread(target=_build_model_thread, args=(config_dir, model, comp, caseroot, libroot, bldroot, incroot, file_build, thread_bad_results, smp, compiler)) t.start() logs.append(file_build) # Wait for threads to finish while(threading.active_count() > 1): time.sleep(1) expect(not thread_bad_results, "\n".join(thread_bad_results)) # # Now build the executable # if not buildlist: cime_model = get_model() file_build = os.path.join(exeroot, "{}.bldlog.{}".format(cime_model, lid)) config_dir = os.path.join(cimeroot, "src", "drivers", comp_interface, "cime_config") bldroot = os.path.join(exeroot, "cpl", "obj") if not os.path.isdir(bldroot): os.makedirs(bldroot) logger.info("Building {} with output to {} ".format(cime_model, file_build)) with open(file_build, "w") as fd: stat = run_cmd("{}/buildexe {} {} {} " .format(config_dir, caseroot, libroot, bldroot), from_dir=bldroot, arg_stdout=fd, arg_stderr=subprocess.STDOUT)[0] analyze_build_log("{} exe".format(cime_model), file_build, compiler) expect(stat == 0, "BUILD FAIL: buildexe failed, cat {}".format(file_build)) # Copy the just-built ${MODEL}.exe to ${MODEL}.exe.$LID safe_copy("{}/{}.exe".format(exeroot, cime_model), "{}/{}.exe.{}".format(exeroot, cime_model, lid)) logs.append(file_build) return logs
model_dir, comp)) comp_interface = case.get_value("COMP_INTERFACE") multi_driver = case.get_value("MULTI_DRIVER") ninst = 1 if multi_driver: ninst_max = case.get_value("NINST_MAX") if comp_interface != "nuopc" and model not in ("DRV", "CPL", "ESP"): ninst_model = case.get_value("NINST_{}".format(model)) expect( ninst_model == ninst_max, "MULTI_DRIVER mode, all components must have same NINST value. NINST_{} != {}" .format(model, ninst_max)) if comp == "cpl": if not os.path.exists("user_nl_cpl"): safe_copy(os.path.join(model_dir, "user_nl_cpl"), ".") else: if comp_interface == "nuopc": ninst = case.get_value("NINST") elif ninst == 1: ninst = case.get_value("NINST_{}".format(model)) nlfile = "user_nl_{}".format(comp) model_nl = os.path.join(model_dir, nlfile) if ninst > 1: for inst_counter in range(1, ninst + 1): inst_nlfile = "{}_{:04d}".format(nlfile, inst_counter) if not os.path.exists(inst_nlfile): # If there is a user_nl_foo in the case directory, copy it # to user_nl_foo_INST; otherwise, copy the original # user_nl_foo from model_dir if os.path.exists(nlfile):
def _save_prerun_timing_e3sm(case, lid): project = case.get_value("PROJECT", subgroup=case.get_primary_job()) if not case.is_save_timing_dir_project(project): return timing_dir = case.get_value("SAVE_TIMING_DIR") if timing_dir is None or not os.path.isdir(timing_dir): logger.warning("SAVE_TIMING_DIR {} is not valid. E3SM requires a valid SAVE_TIMING_DIR to archive timing data.".format(timing_dir)) return logger.info("Archiving timing data and associated provenance in {}.".format(timing_dir)) rundir = case.get_value("RUNDIR") blddir = case.get_value("EXEROOT") caseroot = case.get_value("CASEROOT") cimeroot = case.get_value("CIMEROOT") base_case = case.get_value("CASE") full_timing_dir = os.path.join(timing_dir, "performance_archive", getpass.getuser(), base_case, lid) if os.path.exists(full_timing_dir): logger.warning("{} already exists. Skipping archive of timing data and associated provenance.".format(full_timing_dir)) return try: os.makedirs(full_timing_dir) except OSError: logger.warning("{} cannot be created. Skipping archive of timing data and associated provenance.".format(full_timing_dir)) return mach = case.get_value("MACH") compiler = case.get_value("COMPILER") # For some batch machines save queue info job_id = _get_batch_job_id_for_syslog(case) if job_id is not None: if mach == "mira": for cmd, filename in [("qstat -f", "qstatf"), ("qstat -lf %s" % job_id, "qstatf_jobid")]: filename = "%s.%s" % (filename, lid) run_cmd_no_fail(cmd, arg_stdout=filename, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename)) elif mach == "theta": for cmd, filename in [("qstat -l --header JobID:JobName:User:Project:WallTime:QueuedTime:Score:RunTime:TimeRemaining:Nodes:State:Location:Mode:Command:Args:Procs:Queue:StartTime:attrs:Geometry", "qstatf"), ("qstat -lf %s" % job_id, "qstatf_jobid"), ("xtnodestat", "xtnodestat"), ("xtprocadmin", "xtprocadmin")]: filename = "%s.%s" % (filename, lid) run_cmd_no_fail(cmd, arg_stdout=filename, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename)) elif mach in ["edison", "cori-haswell", "cori-knl"]: for cmd, filename in [("sinfo -a -l", "sinfol"), ("sqs -f %s" % job_id, "sqsf_jobid"), # ("sqs -f", "sqsf"), ("squeue -o '%.10i %.15P %.20j %.10u %.7a %.2t %.6D %.8C %.10M %.10l %.20S %.20V'", "squeuef"), ("squeue -t R -o '%.10i %R'", "squeues")]: filename = "%s.%s" % (filename, lid) run_cmd_no_fail(cmd, arg_stdout=filename, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename)) elif mach == "titan": for cmd, filename in [("qstat -f %s >" % job_id, "qstatf_jobid"), ("xtnodestat >", "xtnodestat"), # ("qstat -f >", "qstatf"), # ("xtdb2proc -f", "xtdb2proc"), ("showq >", "showq")]: full_cmd = cmd + " " + filename run_cmd_no_fail(full_cmd + "." + lid, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename + "." + lid)) # mdiag_reduce = os.path.join(full_timing_dir, "mdiag_reduce." + lid) # run_cmd_no_fail("./mdiag_reduce.csh", arg_stdout=mdiag_reduce, from_dir=os.path.join(caseroot, "Tools")) # gzip_existing_file(mdiag_reduce) elif mach == "anvil": for cmd, filename in [("qstat -f -1 acme >", "qstatf"), ("qstat -f %s >" % job_id, "qstatf_jobid"), ("qstat -r acme >", "qstatr")]: full_cmd = cmd + " " + filename run_cmd_no_fail(full_cmd + "." + lid, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename + "." + lid)) elif mach == "summit": for cmd, filename in [("bjobs -u all >", "bjobsu_all"), ("bjobs -r -u all -o 'jobid slots exec_host' >", "bjobsru_allo"), ("bjobs -l -UF %s >" % job_id, "bjobslUF_jobid")]: full_cmd = cmd + " " + filename run_cmd_no_fail(full_cmd + "." + lid, from_dir=full_timing_dir) gzip_existing_file(os.path.join(full_timing_dir, filename + "." + lid)) # copy/tar SourceModes source_mods_dir = os.path.join(caseroot, "SourceMods") if os.path.isdir(source_mods_dir): with tarfile.open(os.path.join(full_timing_dir, "SourceMods.{}.tar.gz".format(lid)), "w:gz") as tfd: tfd.add(source_mods_dir, arcname="SourceMods") # Save various case configuration items case_docs = os.path.join(full_timing_dir, "CaseDocs.{}".format(lid)) os.mkdir(case_docs) globs_to_copy = [ "CaseDocs/*", "*.run", ".*.run", "*.xml", "user_nl_*", "*env_mach_specific*", "Macros*", "README.case", "Depends.{}".format(mach), "Depends.{}".format(compiler), "Depends.{}.{}".format(mach, compiler), "software_environment.txt" ] for glob_to_copy in globs_to_copy: for item in glob.glob(os.path.join(caseroot, glob_to_copy)): safe_copy(item, os.path.join(case_docs, "{}.{}".format(os.path.basename(item).lstrip("."), lid)), preserve_meta=False) # Copy some items from build provenance blddir_globs_to_copy = [ "GIT_LOGS_HEAD", "build_environment.txt" ] for blddir_glob_to_copy in blddir_globs_to_copy: for item in glob.glob(os.path.join(blddir, blddir_glob_to_copy)): safe_copy(item, os.path.join(full_timing_dir, os.path.basename(item) + "." + lid), preserve_meta=False) # Save state of repo from_repo = cimeroot if os.path.exists(os.path.join(cimeroot, ".git")) else os.path.dirname(cimeroot) desc = get_current_commit(tag=True, repo=from_repo) with open(os.path.join(full_timing_dir, "GIT_DESCRIBE.{}".format(lid)), "w") as fd: fd.write(desc) # What this block does is mysterious to me (JGF) if job_id is not None: # Kill mach_syslog from previous run if one exists syslog_jobid_path = os.path.join(rundir, "syslog_jobid.{}".format(job_id)) if os.path.exists(syslog_jobid_path): try: with open(syslog_jobid_path, "r") as fd: syslog_jobid = int(fd.read().strip()) os.kill(syslog_jobid, signal.SIGTERM) except (ValueError, OSError) as e: logger.warning("Failed to kill syslog: {}".format(e)) finally: os.remove(syslog_jobid_path) # If requested, spawn a mach_syslog process to monitor job progress sample_interval = case.get_value("SYSLOG_N") if sample_interval > 0: archive_checkpoints = os.path.join(full_timing_dir, "checkpoints.{}".format(lid)) os.mkdir(archive_checkpoints) touch("{}/e3sm.log.{}".format(rundir, lid)) syslog_jobid = run_cmd_no_fail("./mach_syslog {si} {jobid} {lid} {rundir} {rundir}/timing/checkpoints {ac} >& /dev/null & echo $!".format(si=sample_interval, jobid=job_id, lid=lid, rundir=rundir, ac=archive_checkpoints), from_dir=os.path.join(caseroot, "Tools")) with open(os.path.join(rundir, "syslog_jobid.{}".format(job_id)), "w") as fd: fd.write("{}\n".format(syslog_jobid))