def create_caseroot(self, clone=False): if not os.path.exists(self._caseroot): # Make the case directory logger.info(" Creating Case directory %s" %self._caseroot) os.makedirs(self._caseroot) os.chdir(self._caseroot) # Create relevant directories in $self._caseroot if clone: newdirs = ("LockedFiles", "Tools") else: newdirs = ("SourceMods", "LockedFiles", "Buildconf", "Tools") for newdir in newdirs: os.makedirs(newdir) # Open a new README.case file in $self._caseroot append_status(" ".join(sys.argv), caseroot=self._caseroot, sfile="README.case") append_status("Compset longname is %s"%self.get_value("COMPSET"), caseroot=self._caseroot, sfile="README.case") append_status("Compset specification file is %s" % (self.get_value("COMPSETS_SPEC_FILE")), caseroot=self._caseroot, sfile="README.case") append_status("Pes specification file is %s" % (self.get_value("PES_SPEC_FILE")), caseroot=self._caseroot, sfile="README.case") for component_class in self._component_classes: if component_class == "DRV": continue comp_grid = "%s_GRID"%component_class append_status("%s is %s"%(comp_grid,self.get_value(comp_grid)), caseroot=self._caseroot, sfile="README.case") if not clone: self._create_caseroot_sourcemods() self._create_caseroot_tools()
def build(self, sharedlib_only=False, model_only=False): """ Do NOT override this method, this method is the framework that controls the build phase. build_phase is the extension point that subclasses should use. """ success = True for phase_name, phase_bool in [(SHAREDLIB_BUILD_PHASE, not model_only), (MODEL_BUILD_PHASE, not sharedlib_only)]: if phase_bool: with self._test_status: self._test_status.set_status(phase_name, TEST_PENDING_STATUS) start_time = time.time() try: self.build_phase(sharedlib_only=(phase_name==SHAREDLIB_BUILD_PHASE), model_only=(phase_name==MODEL_BUILD_PHASE)) except: success = False excmsg = "Exception during build:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc()) logger.warning(excmsg) append_status(excmsg, sfile="TestStatus.log") time_taken = time.time() - start_time with self._test_status: self._test_status.set_status(phase_name, TEST_PASS_STATUS if success else TEST_FAIL_STATUS, comments=("time=%d" % int(time_taken))) if not success: break return success
def generate_baseline(self): """ generate a new baseline case based on the current test """ if self._runstatus != "PASS": append_status("Cannot generate baselines, test did not pass.\n", sfile="TestStatus.log") return newestcpllogfile = self._get_latest_cpl_log() baselineroot = self._case.get_value("BASELINE_ROOT") basegen_dir = os.path.join(baselineroot, self._case.get_value("BASEGEN_CASE")) for bdir in (baselineroot, basegen_dir): if not os.path.isdir(bdir): append_status("GFAIL %s baseline\n" % self._case.get_value("CASEBASEID"), sfile="TestStatus") append_status("ERROR %s does not exist" % bdir, sfile="TestStatus.log") return -1 compgen = os.path.join(self._case.get_value("SCRIPTSROOT"),"Tools", "component_compgen_baseline.sh") compgen += " -baseline_dir "+basegen_dir compgen += " -test_dir "+self._case.get_value("RUNDIR") compgen += " -generate_tag "+self._case.get_value("BASELINE_NAME_GEN") compgen += " -testcase "+self._case.get_value("CASE") compgen += " -testcase_base "+self._case.get_value("CASEBASEID") rc, out, err = run_cmd(compgen, ok_to_fail=True) # copy latest cpl log to baseline # drop the date so that the name is generic shutil.copyfile(newestcpllogfile, os.path.join(basegen_dir,"cpl.log.gz")) append_status(out,sfile="TestStatus") if rc != 0: append_status("Error in Baseline Generate: %s"%err,sfile="TestStatus.log")
def case_cmpgen_namelists(self, compare=False, generate=False, compare_name=None, generate_name=None, baseline_root=None, logfile_name="TestStatus.log"): expect(self.get_value("TEST"), "Only makes sense to run this for a test case") caseroot, casebaseid = self.get_value("CASEROOT"), self.get_value("CASEBASEID") if not compare: compare = self.get_value("COMPARE_BASELINE") if not generate: generate = self.get_value("GENERATE_BASELINE") if not compare and not generate: logging.debug("No namelists compares requested") return True # create namelists for case if they haven't been already casedocs = os.path.join(caseroot, "CaseDocs") if not os.path.exists(os.path.join(casedocs, "drv_in")): self.create_namelists() test_name = casebaseid if casebaseid is not None else self.get_value("CASE") with TestStatus(test_dir=caseroot, test_name=test_name) as ts: try: # Inside this try are where we catch non-fatal errors, IE errors involving # baseline operations which may not directly impact the functioning of the viability of this case if compare and not compare_name: compare_name = self.get_value("BASELINE_NAME_CMP") expect(compare_name, "Was asked to do baseline compare but unable to determine baseline name") logging.info("Comparing namelists with baselines '{}'".format(compare_name)) if generate and not generate_name: generate_name = self.get_value("BASELINE_NAME_GEN") expect(generate_name, "Was asked to do baseline generation but unable to determine baseline name") logging.info("Generating namelists to baselines '{}'".format(generate_name)) success = True output = "" if compare: success, output = _do_full_nl_comp(self, test_name, compare_name, baseline_root) if not success and ts.get_status(RUN_PHASE) is not None: run_warn = \ """NOTE: It is not necessarily safe to compare namelists after RUN phase has completed. Running a case can pollute namelists. The namelists kept in the baselines are pre-RUN namelists.""" output += run_warn logging.info(run_warn) if generate: _do_full_nl_gen(self, test_name, generate_name, baseline_root) except Exception: success = False ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) warn = "Exception during namelist operations:\n{}\n{}".format(sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) try: append_status(output, logfile_name, caseroot=caseroot) except IOError: pass return success
def _check_for_memleak(self, cpllog): """ Examine memory usage as recorded in the cpl log file and look for unexpected increases. """ if self._runstatus != "PASS": append_status("Cannot check memory, test did not pass.\n", sfile="TestStatus.log") return memlist = self._get_mem_usage(cpllog) if len(memlist)<3: append_status("COMMENT: insuffiencient data for memleak test",sfile="TestStatus") else: finaldate = int(memlist[-1][0]) originaldate = int(memlist[0][0]) finalmem = float(memlist[-1][1]) originalmem = float(memlist[0][1]) memdiff = (finalmem - originalmem)/originalmem if memdiff < 0.1: append_status("PASS %s memleak"%(self._case.get_value("CASEBASEID")), sfile="TestStatus") else: append_status("memleak detected, memory went from %f to %f in %d days" %(originalmem, finalmem, finaldate-originaldate),sfile="TestStatus.log") append_status("FAIL %s memleak"%(self._case.get_value("CASEBASEID")), sfile="TestStatus")
def create_caseroot(self, clone=False): if not os.path.exists(self._caseroot): # Make the case directory logger.info(" Creating Case directory %s" %self._caseroot) os.makedirs(self._caseroot) os.chdir(self._caseroot) # Create relevant directories in $self._caseroot if clone: newdirs = ("LockedFiles", "Tools") else: newdirs = ("SourceMods", "LockedFiles", "Buildconf", "Tools") for newdir in newdirs: os.makedirs(newdir) # Open a new README.case file in $self._caseroot append_status(" ".join(sys.argv), caseroot=self._caseroot, sfile="README.case") append_status("Compset longname is %s"%self.get_value("COMPSET"), caseroot=self._caseroot, sfile="README.case") append_status("Compset specification file is %s" % (self.get_value("COMPSETS_SPEC_FILE")), caseroot=self._caseroot, sfile="README.case") append_status("Pes specification file is %s" % (self.get_value("PES_SPEC_FILE")), caseroot=self._caseroot, sfile="README.case") for component_class in self._component_classes: if component_class == "DRV": continue comp_grid = "%s_GRID"%component_class append_status("%s is %s"%(comp_grid,self.get_value(comp_grid)), caseroot=self._caseroot, sfile="README.case") if not clone: self._create_caseroot_sourcemods() self._create_caseroot_tools()
def post_run_check(case, lid): ############################################################################### caseroot = case.get_value("CASEROOT") rundir = case.get_value("RUNDIR") model = case.get_value("MODEL") # find the last model.log and cpl.log model_logfile = os.path.join(rundir,model + ".log." + lid) cpl_logfile = os.path.join(rundir,"cpl" + ".log." + lid) if not os.path.isfile(model_logfile): msg = "Model did not complete, no %s log file "%model_logfile append_status(msg, caseroot=caseroot, sfile="CaseStatus") expect(False, msg) elif not os.path.isfile(cpl_logfile): msg = "Model did not complete, no cpl log file" append_status(msg, caseroot=caseroot, sfile="CaseStatus") expect(False, msg) elif os.stat(model_logfile).st_size == 0: msg = " Run FAILED " append_status(msg, caseroot=caseroot, sfile="CaseStatus") expect (False, msg) else: with open(cpl_logfile, 'r') as fd: if 'SUCCESSFUL TERMINATION' in fd.read(): msg = "Run SUCCESSFUL" append_status(msg, caseroot=caseroot, sfile="CaseStatus" ) else: msg = "Model did not complete - see %s \n " %(cpl_logfile) append_status(msg, caseroot=caseroot, sfile="CaseStatus") expect (False, msg)
def _check_for_memleak(self): """ Examine memory usage as recorded in the cpl log file and look for unexpected increases. """ cpllog = self._get_latest_cpl_log() memlist = self._get_mem_usage(cpllog) with self._test_status: if len(memlist)<3: self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS, comments="insuffiencient data for memleak test") else: finaldate = int(memlist[-1][0]) originaldate = int(memlist[0][0]) finalmem = float(memlist[-1][1]) originalmem = float(memlist[0][1]) memdiff = -1 if originalmem > 0: memdiff = (finalmem - originalmem)/originalmem if memdiff < 0: self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS, comments="insuffiencient data for memleak test") elif memdiff < 0.1: self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS) else: comment = "memleak detected, memory went from %f to %f in %d days" % (originalmem, finalmem, finaldate-originaldate) append_status(comment, sfile="TestStatus.log") self._test_status.set_status(MEMLEAK_PHASE, TEST_FAIL_STATUS, comments=comment)
def case_test(case, testname=None): if testname is None: testname = case.get_value("TESTCASE") expect(testname is not None, "testname argument not resolved") logging.warn("Running test for %s" % testname) _set_up_signal_handlers() try: # The following line can throw exceptions if the testname is # not found or the test constructor throws. We need to be # sure to leave TestStatus in the appropriate state if that # happens. test = find_system_test(testname, case)(case) except: caseroot = case.get_value("CASEROOT") with TestStatus(test_dir=caseroot) as ts: ts.set_status(RUN_PHASE, TEST_FAIL_STATUS, comments="failed to initialize") append_status(sys.exc_info()[1], sfile="TestStatus.log") raise success = test.run() return success
def pre_run_check(case): ############################################################################### # Pre run initialization code.. caseroot = case.get_value("CASEROOT") din_loc_root = case.get_value("DIN_LOC_ROOT") batchsubmit = case.get_value("BATCHSUBMIT") mpilib = case.get_value("MPILIB") rundir = case.get_value("RUNDIR") build_complete = case.get_value("BUILD_COMPLETE") # check for locked files. check_lockedfiles(case.get_value("CASEROOT")) logger.debug("check_lockedfiles OK") # check that build is done expect(build_complete, "BUILD_COMPLETE is not true\nPlease rebuild the model interactively") logger.debug("build complete is %s " %build_complete) # load the module environment... case.load_env() # set environment variables # This is a requirement for yellowstone only if mpilib == "mpi-serial" and "MP_MPILIB" in os.environ: del os.environ["MP_MPILIB"] else: os.environ["MPILIB"] = mpilib if batchsubmit is None or len(batchsubmit) == 0: os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" elif batchsubmit == 'UNSET': os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" else: os.environ["LBQUERY"] = "TRUE" # create the timing directories, optionally cleaning them if needed. if not os.path.isdir(rundir): os.mkdir(rundir) if os.path.isdir(os.path.join(rundir, "timing")): shutil.rmtree(os.path.join(rundir, "timing")) os.makedirs(os.path.join(rundir, "timing", "checkpoints")) # This needs to be done everytime the LID changes in order for log files to be set up correctly create_namelists(case) # document process append_status("Run started ", caseroot=caseroot, sfile="CaseStatus") logger.info("-------------------------------------------------------------------------") logger.info(" - To prestage required restarts, untar a restart.tar file into %s" %(rundir)) logger.info(" - Case input data directory (DIN_LOC_ROOT) is %s " %(din_loc_root)) logger.info(" - Checking for required input datasets in DIN_LOC_ROOT") logger.info("-------------------------------------------------------------------------")
def case_cmpgen_namelists(case, compare=False, generate=False, compare_name=None, generate_name=None, baseline_root=None, logfile_name="TestStatus.log"): expect(case.get_value("TEST"), "Only makes sense to run this for a test case") caseroot, casebaseid = case.get_value("CASEROOT"), case.get_value("CASEBASEID") if not compare: compare = case.get_value("COMPARE_BASELINE") if not generate: generate = case.get_value("GENERATE_BASELINE") if not compare and not generate: logging.debug("No namelists compares requested") return True # create namelists for case if they haven't been already casedocs = os.path.join(caseroot, "CaseDocs") if not os.path.exists(os.path.join(casedocs, "drv_in")): create_namelists(case) test_name = casebaseid if casebaseid is not None else case.get_value("CASE") with TestStatus(test_dir=caseroot, test_name=test_name) as ts: try: # Inside this try are where we catch non-fatal errors, IE errors involving # baseline operations which may not directly impact the functioning of the viability of this case if compare and not compare_name: compare_name = case.get_value("BASELINE_NAME_CMP") expect(compare_name, "Was asked to do baseline compare but unable to determine baseline name") logging.info("Comparing namelists with baselines '{}'".format(compare_name)) if generate and not generate_name: generate_name = case.get_value("BASELINE_NAME_GEN") expect(generate_name, "Was asked to do baseline generation but unable to determine baseline name") logging.info("Generating namelists to baselines '{}'".format(generate_name)) success = True output = "" if compare: success, output = _do_full_nl_comp(case, test_name, compare_name, baseline_root) if not success and ts.get_status(RUN_PHASE) is not None: run_warn = \ """NOTE: It is not necessarily safe to compare namelists after RUN phase has completed. Running a case can pollute namelists. The namelists kept in the baselines are pre-RUN namelists.""" output += run_warn logging.info(run_warn) if generate: _do_full_nl_gen(case, test_name, generate_name, baseline_root) except: ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) success = False warn = "Exception during namelist operations:\n{}\n{}".format(sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) try: append_status(output, logfile_name, caseroot=caseroot) except IOError: pass return success
def _log_output(self, test, output): ########################################################################### test_dir = self._get_test_dir(test) if not os.path.isdir(test_dir): # Note: making this directory could cause create_newcase to fail # if this is run before. os.makedirs(test_dir) append_status(output,caseroot=test_dir,sfile="TestStatus.log")
def _do_external(script_name, caseroot, rundir, lid, prefix): ############################################################################### expect(os.path.isfile(script_name), "External script {} not found".format(script_name)) filename = "{}.external.log.{}".format(prefix, lid) outfile = os.path.join(rundir, filename) append_status("Starting script {}".format(script_name), "CaseStatus") run_sub_or_cmd(script_name, [caseroot], (os.path.basename(script_name).split('.',1))[0], [caseroot], logfile=outfile) append_status("Completed script {}".format(script_name), "CaseStatus")
def _do_external(script_name, caseroot, rundir, lid, prefix): ############################################################################### expect(os.path.isfile(script_name), "External script {} not found".format(script_name)) filename = "{}.external.log.{}".format(prefix, lid) outfile = os.path.join(rundir, filename) append_status("Starting script {}".format(script_name), "CaseStatus") run_sub_or_cmd(script_name, [caseroot], (os.path.basename(script_name).split('.',1))[0], [caseroot], logfile=outfile) # For sub, use case? append_status("Completed script {}".format(script_name), "CaseStatus")
def _log_output(self, test, output): ########################################################################### test_dir = self._get_test_dir(test) if not os.path.isdir(test_dir): # Note: making this directory could cause create_newcase to fail # if this is run before. os.makedirs(test_dir) append_status(output, caseroot=test_dir, sfile="TestStatus.log")
def _component_compare_move(self, suffix): cmd = os.path.join(self._case.get_value("SCRIPTSROOT"), "Tools", "component_compare_move.sh") rc, out, err = run_cmd("%s -rundir %s -testcase %s -suffix %s" % (cmd, self._case.get_value('RUNDIR'), self._case.get_value('CASE'), suffix), ok_to_fail=True) if rc == 0: append_status(out, sfile="TestStatus.log") else: append_status("Component_compare_test.sh failed out: %s\n\nerr: %s\n"%(out,err) ,sfile="TestStatus.log")
def _component_compare_test(self, suffix1, suffix2): """ Return value is not generally checked, but is provided in case a custom run case needs indirection based on success. """ success, comments = compare_test(self._case, suffix1, suffix2) append_status(comments, sfile="TestStatus.log") status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS with self._test_status: self._test_status.set_status("%s_%s_%s" % (COMPARE_PHASE, suffix1, suffix2), status) return success
def clean(case, cleanlist=None): ############################################################################### clm_config_opts = case.get_value("CLM_CONFIG_OPTS") comp_lnd = case.get_value("COMP_LND") if cleanlist is None: cleanlist = case.get_value("COMP_CLASSES").split(',') cleanlist = [x.lower().replace('drv','cpl') for x in cleanlist] testcase = case.get_value("TESTCASE") # we only want to clean clm here if it is clm4_0 otherwise remove # it from the cleanlist if testcase is not None and comp_lnd == "clm" and\ clm_config_opts is not None and "lnd" in cleanlist and\ "clm4_0" not in clm_config_opts: cleanlist.remove('lnd') debug = case.get_value("DEBUG") use_esmf_lib = case.get_value("USE_ESMF_LIB") build_threaded = case.get_value("BUILD_THREADED") gmake = case.get_value("GMAKE") caseroot = case.get_value("CASEROOT") casetools = case.get_value("CASETOOLS") os.environ["DEBUG"] = stringify_bool(debug) os.environ["USE_ESMF_LIB"] = stringify_bool(use_esmf_lib) os.environ["BUILD_THREADED"] = stringify_bool(build_threaded) os.environ["CASEROOT"] = case.get_value("CASEROOT") os.environ["COMP_INTERFACE"] = case.get_value("COMP_INTERFACE") os.environ["PIO_VERSION"] = str(case.get_value("PIO_VERSION")) os.environ["CLM_CONFIG_OPTS"] = clm_config_opts if clm_config_opts is not None else "" cmd = gmake + " -f " + casetools + "/Makefile" for item in cleanlist: cmd = cmd + " clean" + item logger.info("calling %s "%(cmd)) run_cmd(cmd) # unlink Locked files directory file = os.path.join(caseroot,"LockedFiles/env_build.xml") if os.path.isfile(file): os.unlink(file) # reset following values in xml files case.set_value("SMP_BUILD",str(0)) case.set_value("NINST_BUILD",str(0)) case.set_value("BUILD_STATUS",str(0)) case.set_value("BUILD_COMPLETE","FALSE") case.flush() # append call of to CaseStatus msg = "cleanbuild %s "%" ".join(cleanlist) append_status(msg, caseroot=caseroot, sfile="CaseStatus")
def case_cmpgen_namelists(case, compare=False, generate=False, compare_name=None, generate_name=None, baseline_root=None, logfile_name="TestStatus.log"): expect(case.get_value("TEST"), "Only makes sense to run this for a test case") caseroot, casebaseid = case.get_value("CASEROOT"), case.get_value("CASEBASEID") if not compare: compare = case.get_value("COMPARE_BASELINE") if not generate: generate = case.get_value("GENERATE_BASELINE") if not compare and not generate: logging.info("Nothing to do") return True # create namelists for case if they haven't been already casedocs = os.path.join(caseroot, "CaseDocs") if not os.path.exists(os.path.join(casedocs, "drv_in")): create_namelists(case) test_name = casebaseid if casebaseid is not None else case.get_value("CASE") with TestStatus(test_dir=caseroot, test_name=test_name) as ts: try: # Inside this try are where we catch non-fatal errors, IE errors involving # baseline operations which may not directly impact the functioning of the viability of this case if compare and not compare_name: compare_name = case.get_value("BASELINE_NAME_CMP") compare_name = get_current_branch() if compare_name is None else compare_name expect(compare_name, "Was asked to do baseline compare but unable to determine baseline name") logging.info("Comparing namelists with baselines '%s'" % compare_name) if generate and not generate_name: generate_name = case.get_value("BASELINE_NAME_GEN") generate_name = get_current_branch() if generate_name is None else generate_name expect(generate_name, "Was asked to do baseline generation but unable to determine baseline name") logging.info("Generating namelists to baselines '%s'" % generate_name) success = True output = "" if compare: success, output = _do_full_nl_comp(case, test_name, compare_name, baseline_root) if generate: _do_full_nl_gen(case, test_name, generate_name, baseline_root) except: ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) success = False warn = "Exception during namelist operations:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) append_status(output, caseroot=caseroot, sfile=logfile_name) return success
def _consumer(self, test, test_phase, phase_method): ########################################################################### before_time = time.time() success, errors = self._run_catch_exceptions(test, test_phase, phase_method) elapsed_time = time.time() - before_time status = (TEST_PEND_STATUS if test_phase == RUN_PHASE and not \ self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS if status != TEST_PEND_STATUS: self._update_test_status(test, test_phase, status) if not self._work_remains(test): self._completed_tests += 1 total = len(self._tests) status_str = "Finished {} for test {} in {:f} seconds ({}). [COMPLETED {:d} of {:d}]".format( test_phase, test, elapsed_time, status, self._completed_tests, total) else: status_str = "Finished {} for test {} in {:f} seconds ({})".format( test_phase, test, elapsed_time, status) if not success: status_str += "\n Case dir: {}\n".format( self._get_test_dir(test)) status_str += " Errors were:\n {}\n".format( "\n ".join(errors.splitlines())) logger.info(status_str) if test_phase in [CREATE_NEWCASE_PHASE, XML_PHASE]: # These are the phases for which TestScheduler is reponsible for # updating the TestStatus file self._update_test_status_file(test, test_phase, status) if test_phase == XML_PHASE: append_status("Case Created using: " + " ".join(sys.argv), "README.case", caseroot=self._get_test_dir(test)) # On batch systems, we want to immediately submit to the queue, because # it's very cheap to submit and will get us a better spot in line if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE): logger.info( "Starting {} for test {} with 1 proc on interactive node and {:d} procs on compute nodes" .format(RUN_PHASE, test, self._get_procs_needed(test, RUN_PHASE, no_batch=True))) self._update_test_status(test, RUN_PHASE, TEST_PEND_STATUS) self._consumer(test, RUN_PHASE, self._run_phase)
def case_st_archive(case): ############################################################################### caseroot = case.get_value("CASEROOT") logger.info("st_archive starting") # do short-term archiving append_status("st_archiving starting", caseroot=caseroot, sfile="CaseStatus") cmd = os.path.join(caseroot, "Tools/st_archive") + " >> stArchiveStatus 2>&1" rc, out, err = run_cmd(cmd, ok_to_fail=True) if rc != 0: append_status("st_archive failed: %s \nerr = %s"%(out,err),sfile="CaseStatus") return False append_status("st_archiving completed", caseroot=caseroot, sfile="CaseStatus") logger.info("st_archive completed") # resubmit case if appropriate resubmit = case.get_value("RESUBMIT") if resubmit > 0: append_status("resubmitting from st_archive", caseroot=caseroot, sfile="CaseStatus") logger.info("resubmitting from st_archive, resubmit=%d"%resubmit) submit(case, resubmit=True) return True
def _component_compare_test(self, suffix1, suffix2): cmd = os.path.join(self._case.get_value("SCRIPTSROOT"),"Tools", "component_compare_test.sh") rc, out, err = run_cmd("%s -rundir %s -testcase %s -testcase_base %s -suffix1 %s -suffix2 %s -msg 'Compare %s and %s'" %(cmd, self._case.get_value('RUNDIR'), self._case.get_value('CASE'), self._case.get_value('CASEBASEID'), suffix1, suffix2, suffix1, suffix2), ok_to_fail=True) logger.debug("run %s results %d %s %s"%(cmd,rc,out,err)) if rc == 0: append_status(out.replace("compare","compare functionality", 1) + "\n", sfile="TestStatus") else: append_status("Component_compare_test.sh failed out: %s\n\nerr: %s\n"%(out,err), sfile="TestStatus.log") return False return True
def _generate_baseline(self): """ generate a new baseline case based on the current test """ with self._test_status: # generate baseline success, comments = generate_baseline(self._case) append_status(comments, sfile="TestStatus.log") status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS self._test_status.set_status("%s" % GENERATE_PHASE, status) basegen_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASEGEN_CASE")) # copy latest cpl log to baseline # drop the date so that the name is generic newestcpllogfile = self._get_latest_cpl_log() shutil.copyfile(newestcpllogfile, os.path.join(basegen_dir,"cpl.log.gz"))
def case_lt_archive(case): ############################################################################### caseroot = case.get_value("CASEROOT") # max number of threads needed by scripts os.environ["maxthrds"] = "1" # document start append_status("lt_archive starting", caseroot=caseroot, sfile="CaseStatus") # determine status of run and short term archiving runComplete = does_file_have_string(os.path.join(caseroot, "CaseStatus"), "Run SUCCESSFUL") staComplete = does_file_have_string(os.path.join(caseroot, "CaseStatus"), "st_archiving completed") # set up envrionment vars and call the lt_archive.sh script if runComplete and staComplete: os.environ["DOUT_S_ROOT"] = case.get_value("DOUT_S_ROOT") os.environ["DOUT_L_MSROOT"] = case.get_value("DOUT_L_MSROOT") os.environ["DOUT_L_HPSS_ACCNT"] = case.get_value("DOUT_L_HPSS_ACCNT") lid = time.strftime("%y%m%d-%H%M%S") lt_archive = LTArchive(case.get_value("MACH")) lt_archive_args = lt_archive.get_lt_archive_args() if lt_archive_args is None: lt_archive_args = " " cmd = os.path.join(caseroot, "Toolslt_archive.sh") \ + lt_archive_args + "ltArchiveStatus." + lid + " 2>&1" run_cmd_no_fail(cmd, from_dir=caseroot) else: logger.warn("runComplete %s staComplete %s" % (runComplete, staComplete)) expect( False, "lt_archive: run or st_archive is not yet complete or was not successful." "Unable to perform long term archive...") # document completion append_status("lt_archive completed", caseroot=caseroot, sfile="CaseStatus") return True
def _consumer(self, test, test_phase, phase_method): ########################################################################### before_time = time.time() success, errors = self._run_catch_exceptions(test, test_phase, phase_method) elapsed_time = time.time() - before_time status = (TEST_PEND_STATUS if test_phase == RUN_PHASE and not \ self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS if status != TEST_PEND_STATUS: self._update_test_status(test, test_phase, status) if not self._work_remains(test): self._completed_tests += 1 total = len(self._tests) status_str = "Finished {} for test {} in {:f} seconds ({}). [COMPLETED {:d} of {:d}]".format(test_phase, test, elapsed_time, status, self._completed_tests, total) else: status_str = "Finished {} for test {} in {:f} seconds ({})".format(test_phase, test, elapsed_time, status) if not success: status_str += "\n Case dir: {}\n".format(self._get_test_dir(test)) status_str += " Errors were:\n {}\n".format("\n ".join(errors.splitlines())) logger.info(status_str) if test_phase in [CREATE_NEWCASE_PHASE, XML_PHASE]: # These are the phases for which TestScheduler is reponsible for # updating the TestStatus file self._update_test_status_file(test, test_phase, status) if test_phase == XML_PHASE: append_status("Case Created using: "+" ".join(sys.argv), "README.case", caseroot=self._get_test_dir(test)) # On batch systems, we want to immediately submit to the queue, because # it's very cheap to submit and will get us a better spot in line if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE): logger.info("Starting {} for test {} with 1 proc on interactive node and {:d} procs on compute nodes".format(RUN_PHASE, test, self._get_procs_needed(test, RUN_PHASE, no_batch=True))) self._update_test_status(test, RUN_PHASE, TEST_PEND_STATUS) self._consumer(test, RUN_PHASE, self._run_phase)
def case_lt_archive(case): ############################################################################### caseroot = case.get_value("CASEROOT") # max number of threads needed by scripts os.environ["maxthrds"] = 1 # document start append_status("lt_archive starting",caseroot=caseroot,sfile="CaseStatus") # determine status of run and short term archiving runComplete = does_file_have_string(os.path.join(caseroot, "CaseStatus"), "run SUCCESSFUL") staComplete = does_file_have_string(os.path.join(caseroot, "stArchiveStatus"), "st_archive_complete") # set up envrionment vars and call the lt_archive.sh script if runComplete and staComplete: os.environ["DOUT_S_ROOT"] = case.get_value("DOUT_S_ROOT") os.environ["DOUT_L_MSROOT"] = case.get_value("DOUT_L_MSROOT") os.environ["DOUT_L_HPSS_ACCNT"] = case.get_value("DOUT_L_HPSS_ACCNT") lid = time.strftime("%y%m%d-%H%M%S") lt_archive = LTArchive(case.get_value("MACH")) lt_archive_args = lt_archive.get_lt_archive_args() cmd = os.path.join(caseroot, "Tools/lt_archive.sh") \ + lt_archive_args + "ltArchiveStatus." + lid + " 2>&1" run_cmd(cmd, from_dir=caseroot) else: expect(False, "lt_archive: run or st_archive is not yet complete or was not successful." "Unable to perform long term archive...") # document completion append_status("lt_archive completed" ,caseroot=caseroot, sfile="CaseStatus") return True
def run(self): """ Do NOT override this method, this method is the framework that controls the run phase. run_phase is the extension point that subclasses should use. """ success = True start_time = time.time() try: expect(self._test_status.get_status(MODEL_BUILD_PHASE) == TEST_PASS_STATUS, "Model was not built!") with self._test_status: self._test_status.set_status(RUN_PHASE, TEST_PENDING_STATUS) self.run_phase() if self._case.get_value("GENERATE_BASELINE"): self._generate_baseline() if self._case.get_value("COMPARE_BASELINE"): self._compare_baseline() self._check_for_memleak() except: success = False excmsg = "Exception during run:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc()) logger.warning(excmsg) append_status(excmsg, sfile="TestStatus.log") # Writing the run status should be the very last thing due to wait_for_tests time_taken = time.time() - start_time status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS with self._test_status: self._test_status.set_status(RUN_PHASE, status, comments=("time=%d" % int(time_taken))) # We only return success if every phase, build and later, passed return self._test_status.get_overall_test_status(ignore_namelists=True) == TEST_PASS_STATUS
def case_st_archive(case): ############################################################################### """ Create archive object and perform short term archiving """ caseroot = case.get_value("CASEROOT") dout_s_root = case.get_value('DOUT_S_ROOT') if dout_s_root is None or dout_s_root == 'UNSET': expect(False, 'XML variable DOUT_S_ROOT is required for short-term achiver') if not isdir(dout_s_root): os.makedirs(dout_s_root) dout_s_save_interim = case.get_value('DOUT_S_SAVE_INTERIM_RESTART_FILES') if dout_s_save_interim == 'FALSE' or dout_s_save_interim == 'UNSET': rest_n = case.get_value('REST_N') stop_n = case.get_value('STOP_N') if rest_n < stop_n: logger.warn('Restart files from end of run will be saved' 'interim restart files will be deleted') logger.info("st_archive starting") # do short-term archiving append_status("st_archiving starting", caseroot=caseroot, sfile="CaseStatus") archive = EnvArchive(infile=os.path.join(caseroot, 'env_archive.xml')) _archive_process(case, archive) append_status("st_archiving completed", caseroot=caseroot, sfile="CaseStatus") logger.info("st_archive completed") # resubmit case if appropriate resubmit = case.get_value("RESUBMIT") if resubmit > 0: append_status("resubmitting from st_archive", caseroot=caseroot, sfile="CaseStatus") logger.info("resubmitting from st_archive, resubmit=%d" % resubmit) if case.get_value("MACH") == "mira": expect(os.path.isfile(".original_host"), "ERROR alcf host file not found") with open(".original_host", "r") as fd: sshhost = fd.read() run_cmd("ssh cooleylogin1 ssh %s '%s/case.submit %s --resubmit' "\ %(sshhost, caseroot, caseroot), verbose=True) else: submit(case, resubmit=True)
def case_st_archive(case): ############################################################################### """ Create archive object and perform short term archiving """ caseroot = case.get_value("CASEROOT") dout_s_root = case.get_value("DOUT_S_ROOT") if dout_s_root is None or dout_s_root == "UNSET": expect(False, "XML variable DOUT_S_ROOT is required for short-term achiver") if not isdir(dout_s_root): os.makedirs(dout_s_root) dout_s_save_interim = case.get_value("DOUT_S_SAVE_INTERIM_RESTART_FILES") if dout_s_save_interim == "FALSE" or dout_s_save_interim == "UNSET": rest_n = case.get_value("REST_N") stop_n = case.get_value("STOP_N") if rest_n < stop_n: logger.warn("Restart files from end of run will be saved" "interim restart files will be deleted") logger.info("st_archive starting") # do short-term archiving append_status("st_archiving starting", caseroot=caseroot, sfile="CaseStatus") archive = EnvArchive(infile=os.path.join(caseroot, "env_archive.xml")) _archive_process(case, archive) append_status("st_archiving completed", caseroot=caseroot, sfile="CaseStatus") logger.info("st_archive completed") # resubmit case if appropriate resubmit = case.get_value("RESUBMIT") if resubmit > 0: append_status("resubmitting from st_archive", caseroot=caseroot, sfile="CaseStatus") logger.info("resubmitting from st_archive, resubmit=%d" % resubmit) if case.get_value("MACH") == "mira": expect(os.path.isfile(".original_host"), "ERROR alcf host file not found") with open(".original_host", "r") as fd: sshhost = fd.read() run_cmd( "ssh cooleylogin1 ssh %s '%s/case.submit %s --resubmit' " % (sshhost, caseroot, caseroot), verbose=True ) else: submit(case, resubmit=True) return True
def _compare_baseline(self): """ compare the current test output to a baseline result """ with self._test_status: # compare baseline success, comments = compare_baseline(self._case) append_status(comments, sfile="TestStatus.log") status = TEST_PASS_STATUS if success else TEST_FAIL_STATUS ts_comments = comments if "\n" not in comments else None self._test_status.set_status("%s_baseline" % COMPARE_PHASE, status, comments=ts_comments) basecmp_dir = os.path.join(self._case.get_value("BASELINE_ROOT"), self._case.get_value("BASECMP_CASE")) # compare memory usage to baseline newestcpllogfile = self._get_latest_cpl_log() memlist = self._get_mem_usage(newestcpllogfile) baselog = os.path.join(basecmp_dir, "cpl.log.gz") if not os.path.isfile(baselog): # for backward compatibility baselog = os.path.join(basecmp_dir, "cpl.log") if os.path.isfile(baselog) and len(memlist) > 3: blmem = self._get_mem_usage(baselog)[-1][1] curmem = memlist[-1][1] diff = (curmem-blmem)/blmem if(diff < 0.1): self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS) else: comment = "Error: Memory usage increase > 10% from baseline" self._test_status.set_status(MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment) append_status(comment, sfile="TestStatus.log") # compare throughput to baseline current = self._get_throughput(newestcpllogfile) baseline = self._get_throughput(baselog) #comparing ypd so bigger is better if baseline is not None and current is not None: diff = (baseline - current)/baseline if(diff < 0.25): self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS) else: comment = "Error: Computation time increase > 25% from baseline" self._test_status.set_status(THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment) append_status(comment, sfile="TestStatus.log")
def pre_run_check(case): ############################################################################### # Pre run initialization code.. caseroot = case.get_value("CASEROOT") din_loc_root = case.get_value("DIN_LOC_ROOT") batchsubmit = case.get_value("BATCHSUBMIT") mpilib = case.get_value("MPILIB") rundir = case.get_value("RUNDIR") build_complete = case.get_value("BUILD_COMPLETE") # check for locked files. check_lockedfiles(case.get_value("CASEROOT")) logger.debug("check_lockedfiles OK") # check that build is done expect( build_complete, "BUILD_COMPLETE is not true\nPlease rebuild the model interactively") logger.debug("build complete is %s " % build_complete) # load the module environment... env_module = case.get_env("mach_specific") env_module.load_env_for_case(compiler=case.get_value("COMPILER"), debug=case.get_value("DEBUG"), mpilib=case.get_value("MPILIB")) # set environment variables # This is a requirement for yellowstone only if mpilib == "mpi-serial" and "MP_MPILIB" in os.environ: del os.environ["MP_MPILIB"] else: os.environ["MPILIB"] = mpilib if batchsubmit is None or len(batchsubmit) == 0: os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" elif batchsubmit == 'UNSET': os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" else: os.environ["LBQUERY"] = "TRUE" # create the timing directories, optionally cleaning them if needed. if not os.path.isdir(rundir): os.mkdir(rundir) if os.path.isdir(os.path.join(rundir, "timing")): shutil.rmtree(os.path.join(rundir, "timing")) os.makedirs(os.path.join(rundir, "timing", "checkpoints")) # run preview namelists preview_namelists(case) # document process append_status("Run started ", caseroot=caseroot, sfile="CaseStatus") logger.info( "-------------------------------------------------------------------------" ) logger.info( " - To prestage required restarts, untar a restart.tar file into %s" % (rundir)) logger.info(" - Case input data directory (DIN_LOC_ROOT) is %s " % (din_loc_root)) logger.info(" - Checking for required input datasets in DIN_LOC_ROOT") logger.info( "-------------------------------------------------------------------------" )
def append_status_cprnc_log(msg, logfile_name, test_dir): ############################################################################### try: append_status(msg, logfile_name, caseroot=test_dir) except IOError: pass
def append_status_cprnc_log(msg, logfile_name, test_dir): ############################################################################### try: append_status(msg, logfile_name, caseroot=test_dir) except IOError: pass
def compare_baseline(self): """ compare the current test output to a baseline result """ if self._runstatus != "PASS": append_status("Cannot compare baselines, test did not pass.\n", sfile="TestStatus.log") return baselineroot = self._case.get_value("BASELINE_ROOT") basecmp_dir = os.path.join(baselineroot, self._case.get_value("BASECMP_CASE")) for bdir in (baselineroot, basecmp_dir): if not os.path.isdir(bdir): append_status("GFAIL %s baseline\n",self._case.get_value("CASEBASEID"), sfile="TestStatus") append_status("ERROR %s does not exist"%bdir, sfile="TestStatus.log") return -1 compgen = os.path.join(self._case.get_value("SCRIPTSROOT"),"Tools", "component_compgen_baseline.sh") compgen += " -baseline_dir "+basecmp_dir compgen += " -test_dir "+self._case.get_value("RUNDIR") compgen += " -compare_tag "+self._case.get_value("BASELINE_NAME_CMP") compgen += " -testcase "+self._case.get_value("CASE") compgen += " -testcase_base "+self._case.get_value("CASEBASEID") rc, out, err = run_cmd(compgen, ok_to_fail=True) append_status(out.replace("compare","compare baseline", 1),sfile="TestStatus") if rc != 0: append_status("Error in Baseline compare: %s\n%s"%(out,err), sfile="TestStatus.log") # compare memory usage to baseline newestcpllogfile = self._get_latest_cpl_log() memlist = self._get_mem_usage(newestcpllogfile) baselog = os.path.join(basecmp_dir, "cpl.log.gz") if not os.path.isfile(baselog): # for backward compatibility baselog = os.path.join(basecmp_dir, "cpl.log") if len(memlist) > 3: blmem = self._get_mem_usage(baselog)[-1][1] curmem = memlist[-1][1] diff = (curmem-blmem)/blmem if(diff < 0.1): append_status("PASS Memory usage baseline compare ",sfile="TestStatus") else: append_status("FAIL Memory usage increase > 10% from baseline",sfile="TestStatus") # compare throughput to baseline current = self._get_throughput(newestcpllogfile) baseline = self._get_throughput(baselog) #comparing ypd so bigger is better if baseline is not None and current is not None: diff = (baseline - current)/baseline if(diff < 0.25): append_status("PASS Throughput baseline compare ",sfile="TestStatus") else: append_status("FAIL Throughput increase > 25% from baseline",sfile="TestStatus")
def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): ############################################################################### os.chdir(caseroot) msg = "case.setup starting" append_status(msg, caseroot=caseroot, sfile="CaseStatus") cimeroot = get_cime_root(case) # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests din_loc_root = case.get_value("DIN_LOC_ROOT") testcase = case.get_value("TESTCASE") expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"), "inputdata root is not a directory: \"$din_loc_root\" ") # Check that userdefine settings are specified before expanding variable for vid, value in case: expect(not (type(value) is str and "USERDEFINED_required_build" in value), "Parameter '%s' must be defined" % vid) # Create batch script if reset or clean: # Clean batch script backup_dir = "PESetupHist/b.%s" % time.strftime("%y%m%d-%H%M%S") if not os.path.isdir(backup_dir): os.makedirs(backup_dir) # back up relevant files for fileglob in ["case.run", "env_build.xml", "env_mach_pes.xml", "Macros*"]: for filename in glob.glob(fileglob): shutil.copy(filename, backup_dir) if os.path.exists("case.run"): os.remove("case.run") # only do the following if are NOT in testmode if not test_mode: # rebuild the models (even on restart) case.set_value("BUILD_COMPLETE", False) # backup and then clean test script if os.path.exists("case.test"): shutil.copy("case.test", backup_dir) os.remove("case.test") logger.info("Successfully cleaned test script case.test") if os.path.exists("case.testdriver"): shutil.copy("case.testdriver", backup_dir) os.remove("case.testdriver") logger.info("Successfully cleaned test script case.testdriver") logger.info("Successfully cleaned batch script case.run") logger.info("Successfully cleaned batch script case.run") logger.info("Some files have been saved to %s" % backup_dir) msg = "case.setup clean complete" append_status(msg, caseroot=caseroot, sfile="CaseStatus") if not clean: case.load_env() models = case.get_values("COMP_CLASSES") mach = case.get_value("MACH") compiler = case.get_value("COMPILER") debug = case.get_value("DEBUG") mpilib = case.get_value("MPILIB") sysos = case.get_value("OS") expect(mach is not None, "xml variable MACH is not set") # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler # and env_mach_specific.xml if they don't already exist. if not os.path.isfile("Macros.make") or not os.path.isfile("env_mach_specific.xml"): configure(Machines(machine=mach), caseroot, ["Makefile"], compiler, mpilib, debug, sysos) # Set tasks to 1 if mpi-serial library if mpilib == "mpi-serial": for vid, value in case: if vid.startswith("NTASKS_") and value != 1: case.set_value(vid, 1) # Check ninst. # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. for comp in models: if comp == "DRV": continue ninst = case.get_value("NINST_%s" % comp) ntasks = case.get_value("NTASKS_%s" % comp) if ninst > ntasks: if ntasks == 1: case.set_value("NTASKS_%s" % comp, ninst) else: expect(False, "NINST_%s value %d greater than NTASKS_%s %d" % (comp, ninst, comp, ntasks)) if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") else: _check_pelayouts_require_rebuild(case, models) if os.path.exists("LockedFiles/env_build.xml"): os.remove("LockedFiles/env_build.xml") case.flush() check_lockedfiles() env_mach_pes = case.get_env("mach_pes") pestot = env_mach_pes.get_total_tasks(models) logger.debug("at update TOTALPES = %s"%pestot) case.set_value("TOTALPES", pestot) thread_count = env_mach_pes.get_max_thread_count(models) if thread_count > 1: case.set_value("BUILD_THREADED", True) expect(not (case.get_value("BUILD_THREADED") and compiler == "nag"), "it is not possible to run with OpenMP if using the NAG Fortran compiler") cost_pes = env_mach_pes.get_cost_pes(pestot, thread_count, machine=case.get_value("MACH")) case.set_value("COST_PES", cost_pes) # create batch file logger.info("Creating batch script case.run") # Use BatchFactory to get the appropriate instance of a BatchMaker, # use it to create our batch scripts env_batch = case.get_env("batch") num_nodes = env_mach_pes.get_total_nodes(pestot, thread_count) tasks_per_node = env_mach_pes.get_tasks_per_node(pestot, thread_count) for job in env_batch.get_jobs(): input_batch_script = os.path.join(case.get_value("MACHDIR"), env_batch.get_value('template', subgroup=job)) if job == "case.test" and testcase is not None and not test_mode: logger.info("Writing %s script" % job) testscript = os.path.join(cimeroot, "scripts", "Testing", "Testcases", "%s_script" % testcase) # Short term fix to be removed when csh tests are removed if not os.path.exists(testscript): env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count) elif job != "case.test": logger.info("Writing %s script from input template %s" % (job, input_batch_script)) env_batch.make_batch_script(input_batch_script, job, case, pestot, tasks_per_node, num_nodes, thread_count) # Make a copy of env_mach_pes.xml in order to be able # to check that it does not change once case.setup is invoked logger.info("Locking file env_mach_pes.xml") case.flush() logger.debug("at copy TOTALPES = %s"%case.get_value("TOTALPES")) shutil.copy("env_mach_pes.xml", "LockedFiles") # Create user_nl files for the required number of instances if not os.path.exists("user_nl_cpl"): logger.info("Creating user_nl_xxx files for components and cpl") # loop over models for model in models: comp = case.get_value("COMP_%s" % model) logger.info("Building %s usernl files"%model) _build_usernl_files(case, model, comp) if comp == "cism": run_cmd_no_fail("%s/../components/cism/cime_config/cism.template %s" % (cimeroot, caseroot)) _build_usernl_files(case, "drv", "cpl") # Create needed directories for case create_dirs(case) logger.info("If an old case build already exists, might want to run \'case.build --clean\' before building") # Create test script if appropriate # Short term fix to be removed when csh tests are removed if os.path.exists("env_test.xml"): if not os.path.exists("case.test"): logger.info("Starting testcase.setup") run_cmd_no_fail("./testcase.setup -caseroot %s" % caseroot) logger.info("Finished testcase.setup") # some tests need namelists created here (ERP) if test_mode: create_namelists(case) msg = "case.setup complete" append_status(msg, caseroot=caseroot, sfile="CaseStatus") # Record env information env_module = case.get_env("mach_specific") env_module.make_env_mach_specific_file(compiler, debug, mpilib, "sh") env_module.make_env_mach_specific_file(compiler, debug, mpilib, "csh") with open("software_environment.txt", "w") as f: f.write(env_module.list_modules()) run_cmd_no_fail("echo -e '\n' >> software_environment.txt && \ env >> software_environment.txt")
else: if not os.path.exists(nlfile): shutil.copy(model_nl, nlfile) ############################################################################### def _case_setup_impl(case, caseroot, casebaseid, clean=False, test_mode=False, reset=False): ############################################################################### os.chdir(caseroot) msg = "case.setup starting" append_status(msg, caseroot=caseroot, sfile="CaseStatus") cimeroot = get_cime_root(case) # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests din_loc_root = case.get_value("DIN_LOC_ROOT") testcase = case.get_value("TESTCASE") expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"), "inputdata root is not a directory: \"$din_loc_root\" ") # Check that userdefine settings are specified before expanding variable for vid, value in case: expect( not (type(value) is str and "USERDEFINED_required_build" in value), "Parameter '%s' must be defined" % vid)
def compare_test_results(baseline_name, baseline_root, test_root, compiler, test_id=None, compare_tests=None, namelists_only=False, hist_only=False): """Compares with baselines for all matching tests Outputs results for each test to stdout (one line per test); possible status codes are: PASS, FAIL, SKIP. (A SKIP denotes a test that did not make it to the run phase or a test for which the run phase did not pass: we skip baseline comparisons in this case.) In addition, creates files named compare.log.BASELINE_NAME.TIMESTAMP in each test directory, which contain more detailed output. Also creates *.cprnc.out.BASELINE_NAME.TIMESTAMP files in each run directory. Returns True if all tests generated either PASS or SKIP results, False if there was at least one FAIL result. """ ############################################################################### test_id_glob = "*%s*%s*" % (compiler, baseline_name) if test_id is None else "*%s" % test_id test_status_files = glob.glob("%s/%s/%s" % (test_root, test_id_glob, TEST_STATUS_FILENAME)) expect(test_status_files, "No matching test cases found in for %s/%s/%s" % (test_root, test_id_glob, TEST_STATUS_FILENAME)) # ID to use in the log file names, to avoid file name collisions with # earlier files that may exist. log_id = CIME.utils.get_timestamp() logfile_name = "compare.log.%s.%s" % (baseline_name.replace("/", "_"), log_id) all_pass_or_skip = True for test_status_file in test_status_files: test_dir = os.path.dirname(test_status_file) ts = TestStatus(test_dir=test_dir) test_name = ts.get_name() if (compare_tests in [[], None] or CIME.utils.match_any(test_name, compare_tests)): append_status( "Comparing against baseline with compare_test_results:\n" + "Baseline: %s\n"%(baseline_name) + "In baseline_root: %s"%(baseline_root), logfile_name, caseroot=test_dir) if (not hist_only): nl_compare_result = None nl_compare_comment = "" nl_result = ts.get_status(SETUP_PHASE) if (nl_result is None): nl_compare_result = "SKIP" nl_compare_comment = "Test did not make it to setup phase" nl_do_compare = False else: nl_do_compare = True else: nl_do_compare = False detailed_comments = "" if (not namelists_only): compare_result = None compare_comment = "" run_result = ts.get_status(RUN_PHASE) if (run_result is None): compare_result = "SKIP" compare_comment = "Test did not make it to run phase" do_compare = False elif (run_result != TEST_PASS_STATUS): compare_result = "SKIP" compare_comment = "Run phase did not pass" do_compare = False else: do_compare = True else: do_compare = False if nl_do_compare or do_compare: with Case(test_dir) as case: if nl_do_compare: nl_success = compare_namelists(case, baseline_name, baseline_root, logfile_name, compiler) if nl_success: nl_compare_result = TEST_PASS_STATUS nl_compare_comment = "" else: nl_compare_result = TEST_FAIL_STATUS nl_compare_comment = "See %s/%s" % (test_dir, logfile_name) all_pass_or_skip = False if do_compare: success, detailed_comments = compare_history(case, baseline_name, baseline_root, log_id, compiler) if success: compare_result = TEST_PASS_STATUS else: compare_result = TEST_FAIL_STATUS all_pass_or_skip = False # Following the logic in SystemTestsCommon._compare_baseline: # We'll print the comment if it's a brief one-liner; otherwise # the comment will only appear in the log file if "\n" not in detailed_comments: compare_comment = detailed_comments brief_result = "" if not hist_only: brief_result += "%s %s %s %s\n" % (nl_compare_result, test_name, NAMELIST_PHASE, nl_compare_comment) if not namelists_only: brief_result += "%s %s %s" % (compare_result, test_name, BASELINE_PHASE) if compare_comment: brief_result += " %s" % compare_comment brief_result += "\n" print brief_result, append_status(brief_result, logfile_name, caseroot=test_dir) if detailed_comments: append_status("Detailed comments:\n" + detailed_comments, logfile_name, caseroot=test_dir) return all_pass_or_skip
def _case_setup_impl(case, caseroot, casebaseid, clean=False, test_mode=False, reset=False): ############################################################################### os.chdir(caseroot) msg = "case.setup starting" append_status(msg, caseroot=caseroot, sfile="CaseStatus") cimeroot = os.environ["CIMEROOT"] # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests din_loc_root = case.get_value("DIN_LOC_ROOT") testcase = case.get_value("TESTCASE") expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"), "inputdata root is not a directory: \"$din_loc_root\" ") # Check that userdefine settings are specified before expanding variable for vid, value in case: expect(not (type(value) is str and "USERDEFINED_required_build" in value), "Parameter '%s' must be defined" % vid) # Create batch script if reset or clean: # Clean batch script backup_dir = "PESetupHist/b.%s" % time.strftime("%y%m%d-%H%M%S") if not os.path.isdir(backup_dir): os.makedirs(backup_dir) # back up relevant files for fileglob in ["case.run", "env_build.xml", "env_mach_pes.xml", "Macros*"]: for filename in glob.glob(fileglob): shutil.copy(filename, backup_dir) if os.path.exists("case.run"): os.remove("case.run") # only do the following if are NOT in testmode if not test_mode: # rebuild the models (even on restart) case.set_value("BUILD_COMPLETE", False) # backup and then clean test script if os.path.exists("case.test"): shutil.copy("case.test", backup_dir) os.remove("case.test") logger.info("Successfully cleaned test script case.test") if os.path.exists("case.testdriver"): shutil.copy("case.testdriver", backup_dir) os.remove("case.testdriver") logger.info("Successfully cleaned test script case.testdriver") logger.info("Successfully cleaned batch script case.run") logger.info("Successfully cleaned batch script case.run") logger.info("Some files have been saved to %s" % backup_dir) msg = "case.setup clean complete" append_status(msg, caseroot=caseroot, sfile="CaseStatus") if not clean: drv_comp = Component() models = drv_comp.get_valid_model_components() models.remove("DRV") mach, compiler, debug, mpilib = \ case.get_value("MACH"), case.get_value("COMPILER"), case.get_value("DEBUG"), case.get_value("MPILIB") expect(mach is not None, "xml variable MACH is not set") # Create Macros file only if it does not exist if not os.path.exists("Macros"): logger.debug("Creating Macros file for %s" % mach) compilers = Compilers(compiler=compiler, machine=mach, os_=case.get_value("OS"), mpilib=mpilib) compilers.write_macros_file() else: logger.debug("Macros script already created ...skipping") # Set tasks to 1 if mpi-serial library if mpilib == "mpi-serial": for vid, value in case: if vid.startswith("NTASKS_") and value != 1: case.set_value(vid, 1) # Check ninst. # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. # Save ninst in a dict to use later in apply_user_mods ninst = dict() for comp in models: comp_model = case.get_value("COMP_%s" % comp) ninst[comp_model] = case.get_value("NINST_%s" % comp) ntasks = case.get_value("NTASKS_%s" % comp) if ninst[comp_model] > ntasks: if ntasks == 1: case.set_value("NTASKS_%s" % comp, ninst[comp_model]) else: expect(False, "NINST_%s value %d greater than NTASKS_%s %d" % (comp, ninst[comp_model], comp, ntasks)) expect(not (case.get_value("BUILD_THREADED") and compiler == "nag"), "it is not possible to run with OpenMP if using the NAG Fortran compiler") if os.path.exists("case.run"): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") else: _check_pelayouts_require_rebuild(case, models) if os.path.exists("LockedFiles/env_build.xml"): os.remove("LockedFiles/env_build.xml") case.flush() check_lockedfiles() tm = TaskMaker(case) mtpn = case.get_value("MAX_TASKS_PER_NODE") pespn = case.get_value("PES_PER_NODE") # This is hardcoded because on yellowstone by default we # run with 15 pes per node # but pay for 16 pes per node. See github issue #518 if case.get_value("MACH") == "yellowstone": pespn = 16 pestot = tm.totaltasks if mtpn > pespn: pestot = pestot * (mtpn // pespn) case.set_value("COST_PES", tm.num_nodes*pespn) else: # reset cost_pes to totalpes case.set_value("COST_PES", 0) case.set_value("TOTALPES", pestot) # Compute cost based on PE count pval = 1 pcnt = 0 while pval < pestot: pval *= 2 pcnt += 6 # (scaling like sqrt(6/10)) pcost = 3 - pcnt / 10 # (3 is 64 with 6) # Compute cost based on DEBUG dcost = 3 if debug else 0 # Compute cost based on run length # For simplicity, we use a heuristic just based on STOP_OPTION (not considering # STOP_N), and only deal with options longer than ndays lcost = 0 if "nmonth" in case.get_value("STOP_OPTION"): # N months costs 30x as much as N days; since cost is based on log-base-2, add 5 lcost = 5 elif "nyear" in case.get_value("STOP_OPTION"): # N years costs 365x as much as N days; since cost is based on log-base-2, add 9 lcost = 9 estcost = pcost + dcost + lcost for cost in ["CCSM_CCOST", "CCSM_GCOST", "CCSM_TCOST", "CCSM_CCOST"]: estcost += case.get_value(cost) case.set_value("CCSM_PCOST", pcost) case.set_value("CCSM_ESTCOST", estcost) # create batch file logger.info("Creating batch script case.run") # Use BatchFactory to get the appropriate instance of a BatchMaker, # use it to create our batch scripts env_batch = case.get_env("batch") for job in env_batch.get_jobs(): input_batch_script = os.path.join(case.get_value("MACHDIR"), env_batch.get_value('template', subgroup=job)) if job == "case.test" and testcase is not None and not test_mode: logger.info("Writing %s script" % job) testscript = os.path.join(cimeroot, "scripts", "Testing", "Testcases", "%s_script" % testcase) # Short term fix to be removed when csh tests are removed if not os.path.exists(testscript): env_batch.make_batch_script(input_batch_script, job, case) elif job != "case.test": logger.info("Writing %s script" % job) env_batch.make_batch_script(input_batch_script, job, case) # Make a copy of env_mach_pes.xml in order to be able # to check that it does not change once case.setup is invoked logger.info("Locking file env_mach_pes.xml") case.flush() shutil.copy("env_mach_pes.xml", "LockedFiles") # Create user_nl files for the required number of instances if not os.path.exists("user_nl_cpl"): logger.info("Creating user_nl_xxx files for components and cpl") # loop over models for model in models: comp = case.get_value("COMP_%s" % model) logger.info("Building %s usernl files"%model) _build_usernl_files(case, model, comp) if comp == "cism": run_cmd_no_fail("%s/../components/cism/cime_config/cism.template %s" % (cimeroot, caseroot)) _build_usernl_files(case, "drv", "cpl") user_mods_path = case.get_value("USER_MODS_FULLPATH") if user_mods_path is not None: apply_user_mods(caseroot, user_mods_path=user_mods_path, ninst=ninst) elif case.get_value("TEST"): test_mods = parse_test_name(casebaseid)[6] if test_mods is not None: user_mods_path = os.path.join(case.get_value("TESTS_MODS_DIR"), test_mods) apply_user_mods(caseroot, user_mods_path=user_mods_path, ninst=ninst) # Run preview namelists for scripts logger.info("preview_namelists") preview_namelists(case) logger.info("See ./CaseDoc for component namelists") logger.info("If an old case build already exists, might want to run \'case.build --clean\' before building") # Create test script if appropriate # Short term fix to be removed when csh tests are removed if os.path.exists("env_test.xml"): if not os.path.exists("case.test"): logger.info("Starting testcase.setup") run_cmd_no_fail("./testcase.setup -caseroot %s" % caseroot) logger.info("Finished testcase.setup") msg = "case.setup complete" append_status(msg, caseroot=caseroot, sfile="CaseStatus") # Record env information env_module = case.get_env("mach_specific") env_module.make_env_mach_specific_file(compiler, debug, mpilib, "sh") env_module.make_env_mach_specific_file(compiler, debug, mpilib, "csh") with open("software_environment.txt", "w") as f: f.write(env_module.list_modules()) run_cmd_no_fail("echo -e '\n' >> software_environment.txt && \ env >> software_environment.txt")
def pre_run_check(case): ############################################################################### # Pre run initialization code.. caseroot = case.get_value("CASEROOT") din_loc_root = case.get_value("DIN_LOC_ROOT") batchsubmit = case.get_value("BATCHSUBMIT") mpilib = case.get_value("MPILIB") rundir = case.get_value("RUNDIR") build_complete = case.get_value("BUILD_COMPLETE") # check for locked files. check_lockedfiles(case.get_value("CASEROOT")) logger.debug("check_lockedfiles OK") # check that build is done expect( build_complete, "BUILD_COMPLETE is not true\nPlease rebuild the model interactively") logger.debug("build complete is %s " % build_complete) # load the module environment... case.load_env() # set environment variables # This is a requirement for yellowstone only if mpilib == "mpi-serial" and "MP_MPILIB" in os.environ: del os.environ["MP_MPILIB"] else: os.environ["MPILIB"] = mpilib if batchsubmit is None or len(batchsubmit) == 0: os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" elif batchsubmit == 'UNSET': os.environ["LBQUERY"] = "FALSE" os.environ["BATCHQUERY"] = "undefined" else: os.environ["LBQUERY"] = "TRUE" # create the timing directories, optionally cleaning them if needed. if not os.path.isdir(rundir): os.mkdir(rundir) if os.path.isdir(os.path.join(rundir, "timing")): shutil.rmtree(os.path.join(rundir, "timing")) os.makedirs(os.path.join(rundir, "timing", "checkpoints")) # This needs to be done everytime the LID changes in order for log files to be set up correctly # The following also needs to be called in case a user changes a user_nl_xxx file OR an env_run.xml # variable while the job is in the queue create_namelists(case) # document process append_status("Run started ", caseroot=caseroot, sfile="CaseStatus") logger.info( "-------------------------------------------------------------------------" ) logger.info(" - Prestage required restarts into %s" % (rundir)) logger.info(" - Case input data directory (DIN_LOC_ROOT) is %s " % (din_loc_root)) logger.info(" - Checking for required input datasets in DIN_LOC_ROOT") logger.info( "-------------------------------------------------------------------------" )
signal.signal(signum, _signal_handler) def case_test(case, testname=None): if testname is None: testname = case.get_value('TESTCASE') expect(testname is not None, "testname argument not resolved") logging.warn("Running test for %s" % testname) _set_up_signal_handlers() try: # The following line can throw exceptions if the testname is # not found or the test constructor throws. We need to be # sure to leave TestStatus in the appropriate state if that # happens. test = find_system_test(testname, case)(case) except: caseroot = case.get_value("CASEROOT") with TestStatus(test_dir=caseroot) as ts: ts.set_status(RUN_PHASE, TEST_FAIL_STATUS, comments="failed to initialize") append_status(str(sys.exc_info()[1]), sfile="TestStatus.log") raise success = test.run() return success
generate_name = case.get_value("BASELINE_NAME_GEN") generate_name = get_current_branch( ) if generate_name is None else generate_name expect( generate_name, "Was asked to do baseline generation but unable to determine baseline name" ) logging.info("Generating namelists to baselines '%s'" % generate_name) success = True output = "" if compare: success, output = _do_full_nl_comp(case, test_name, compare_name, baseline_root) if generate: _do_full_nl_gen(case, test_name, generate_name, baseline_root) except: ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) success = False warn = "Exception during namelist operations:\n%s\n%s" % ( sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) append_status(output, caseroot=caseroot, sfile=logfile_name) return success
# baseline operations which may not directly impact the functioning of the viability of this case if compare and not compare_name: compare_name = case.get_value("BASELINE_NAME_CMP") compare_name = get_current_branch() if compare_name is None else compare_name expect(compare_name, "Was asked to do baseline compare but unable to determine baseline name") logging.info("Comparing namelists with baselines '%s'" % compare_name) if generate and not generate_name: generate_name = case.get_value("BASELINE_NAME_GEN") generate_name = get_current_branch() if generate_name is None else generate_name expect(generate_name, "Was asked to do baseline generation but unable to determine baseline name") logging.info("Generating namelists to baselines '%s'" % generate_name) success = True output = "" if compare: success, output = _do_full_nl_comp(case, test_name, compare_name) if generate: _do_full_nl_gen(case, test_name, generate_name) except: ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) success = False warn = "Exception during namelist operations:\n%s\n%s" % (sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) append_status(output, caseroot=caseroot, sfile="TestStatus.log") return success
expect( generate_name, "Was asked to do baseline generation but unable to determine baseline name" ) logging.info("Generating namelists to baselines '{}'".format( generate_name)) success = True output = "" if compare: success, output = _do_full_nl_comp(case, test_name, compare_name, baseline_root) if generate: _do_full_nl_gen(case, test_name, generate_name, baseline_root) except: ts.set_status(NAMELIST_PHASE, TEST_FAIL_STATUS) success = False warn = "Exception during namelist operations:\n{}\n{}".format( sys.exc_info()[1], traceback.format_exc()) output += warn logging.warning(warn) finally: ts.set_status(NAMELIST_PHASE, TEST_PASS_STATUS if success else TEST_FAIL_STATUS) try: append_status(output, logfile_name, caseroot=caseroot) except IOError: pass return success
def _component_compare_move(self, suffix): comments = move(self._case, suffix) append_status(comments, sfile="TestStatus.log")