def check_pelayouts_require_rebuild(self, models): """ Create if we require a rebuild, expects cwd is caseroot """ locked_pes = os.path.join(LOCKED_DIR, "env_mach_pes.xml") if os.path.exists(locked_pes): # Look to see if $comp_PE_CHANGE_REQUIRES_REBUILD is defined # for any component env_mach_pes_locked = EnvMachPes(infile=locked_pes, components=self.get_values("COMP_CLASSES")) for comp in models: if self.get_value("{}_PE_CHANGE_REQUIRES_REBUILD".format(comp)): # Changing these values in env_mach_pes.xml will force # you to clean the corresponding component old_tasks = env_mach_pes_locked.get_value("NTASKS_{}".format(comp)) old_threads = env_mach_pes_locked.get_value("NTHRDS_{}".format(comp)) old_inst = env_mach_pes_locked.get_value("NINST_{}".format(comp)) new_tasks = self.get_value("NTASKS_{}".format(comp)) new_threads = self.get_value("NTHRDS_{}".format(comp)) new_inst = self.get_value("NINST_{}".format(comp)) if old_tasks != new_tasks or old_threads != new_threads or old_inst != new_inst: logging.warning("{} pe change requires clean build {} {}".format(comp, old_tasks, new_tasks)) cleanflag = comp.lower() run_cmd_no_fail("./case.build --clean {}".format(cleanflag)) unlock_file("env_mach_pes.xml", self.get_value("CASEROOT"))
def _clean_impl(case, cleanlist, clean_all, clean_depends): ############################################################################### exeroot = os.path.abspath(case.get_value("EXEROOT")) if clean_all: # If cleanlist is empty just remove the bld directory expect(exeroot is not None,"No EXEROOT defined in case") if os.path.isdir(exeroot): logging.info("cleaning directory {}".format(exeroot)) shutil.rmtree(exeroot) # if clean_all is True also remove the sharedlibpath sharedlibroot = os.path.abspath(case.get_value("SHAREDLIBROOT")) expect(sharedlibroot is not None,"No SHAREDLIBROOT defined in case") if sharedlibroot != exeroot and os.path.isdir(sharedlibroot): logging.warning("cleaning directory {}".format(sharedlibroot)) shutil.rmtree(sharedlibroot) else: expect((cleanlist is not None and len(cleanlist) > 0) or (clean_depends is not None and len(clean_depends)),"Empty cleanlist not expected") debug = case.get_value("DEBUG") use_esmf_lib = case.get_value("USE_ESMF_LIB") build_threaded = case.get_build_threaded() gmake = case.get_value("GMAKE") caseroot = os.path.abspath(case.get_value("CASEROOT")) casetools = case.get_value("CASETOOLS") clm_config_opts = case.get_value("CLM_CONFIG_OPTS") os.environ["DEBUG"] = stringify_bool(debug) os.environ["USE_ESMF_LIB"] = stringify_bool(use_esmf_lib) os.environ["BUILD_THREADED"] = stringify_bool(build_threaded) os.environ["CASEROOT"] = caseroot os.environ["COMP_INTERFACE"] = case.get_value("COMP_INTERFACE") os.environ["PIO_VERSION"] = str(case.get_value("PIO_VERSION")) os.environ["CLM_CONFIG_OPTS"] = clm_config_opts if clm_config_opts is not None else "" cmd = gmake + " -f " + os.path.join(casetools, "Makefile") if cleanlist is not None: for item in cleanlist: tcmd = cmd + " clean" + item logger.info("calling {} ".format(tcmd)) run_cmd_no_fail(tcmd) else: for item in clean_depends: tcmd = cmd + " clean_depends" + item logger.info("calling {} ".format(tcmd)) run_cmd_no_fail(tcmd) # unlink Locked files directory unlock_file("env_build.xml") # reset following values in xml files case.set_value("SMP_BUILD",str(0)) case.set_value("NINST_BUILD",str(0)) case.set_value("BUILD_STATUS",str(0)) case.set_value("BUILD_COMPLETE","FALSE") case.flush()
def _submit(case, job=None, no_batch=False, prereq=None, allow_fail=False, resubmit=False, resubmit_immediate=False, skip_pnl=False, mail_user=None, mail_type=None, batch_args=None): if job is None: job = case.get_primary_job() # Check if CONTINUE_RUN value makes sense if job != "case.test" and case.get_value("CONTINUE_RUN"): rundir = case.get_value("RUNDIR") caseroot = case.get_value("CASEROOT") expect(os.path.isdir(rundir), "CONTINUE_RUN is true but RUNDIR {} does not exist".format(rundir)) expect(len(glob.glob(os.path.join(rundir, "*.nc"))) > 0, "CONTINUE_RUN is true but this case does not appear to have been run before (no .nc files in RUNDIR)") expect(does_file_have_string(os.path.join(caseroot, "CaseStatus"), "case.run {}".format(CASE_SUCCESS)), "CONTINUE_RUN is true but this case does not appear to have ever run successfully") # if case.submit is called with the no_batch flag then we assume that this # flag will stay in effect for the duration of the RESUBMITs env_batch = case.get_env("batch") if resubmit: if env_batch.get_batch_system_type() == "none": no_batch = True # This is a resubmission, do not reinitialize test values if job == "case.test": case.set_value("IS_FIRST_RUN", False) resub = case.get_value("RESUBMIT") logger.info("Submitting job '{}', resubmit={:d}".format(job, resub)) case.set_value("RESUBMIT", resub-1) if case.get_value("RESUBMIT_SETS_CONTINUE_RUN"): case.set_value("CONTINUE_RUN", True) else: if job == "case.test": case.set_value("IS_FIRST_RUN", True) if no_batch: batch_system = "none" else: batch_system = env_batch.get_batch_system_type() case.set_value("BATCH_SYSTEM", batch_system) env_batch_has_changed = False try: case.check_lockedfile(os.path.basename(env_batch.filename)) except SystemExit: env_batch_has_changed = True if env_batch.get_batch_system_type() != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) logger.warning(\ """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! """) env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename)) lock_file(os.path.basename(env_batch.filename)) if job == case.get_primary_job(): case.check_case() case.check_DA_settings() if case.get_value("MACH") == "mira": with open(".original_host", "w") as fd: fd.write( socket.gethostname()) #Load Modules case.load_env() case.flush() logger.warning("submit_jobs {}".format(job)) job_ids = case.submit_jobs(no_batch=no_batch, job=job, prereq=prereq, skip_pnl=skip_pnl, resubmit_immediate=resubmit_immediate, allow_fail=allow_fail, mail_user=mail_user, mail_type=mail_type, batch_args=batch_args) xml_jobids = [] for jobname, jobid in job_ids.items(): logger.info("Submitted job {} with id {}".format(jobname, jobid)) if jobid: xml_jobids.append("{}:{}".format(jobname, jobid)) xml_jobid_text = ", ".join(xml_jobids) if xml_jobid_text: case.set_value("JOB_IDS", xml_jobid_text) return xml_jobid_text
def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False): ############################################################################### os.chdir(caseroot) # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests din_loc_root = case.get_value("DIN_LOC_ROOT") testcase = case.get_value("TESTCASE") expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"), "inputdata root is not a directory: {}".format(din_loc_root)) # Remove batch scripts if reset or clean: # clean batch script batch_script = get_batch_script_for_job(case.get_primary_job()) if os.path.exists(batch_script): os.remove(batch_script) logger.info("Successfully cleaned batch script {}".format(batch_script)) if not test_mode: # rebuild the models (even on restart) case.set_value("BUILD_COMPLETE", False) if not clean: case.load_env() models = case.get_values("COMP_CLASSES") mach = case.get_value("MACH") compiler = case.get_value("COMPILER") debug = case.get_value("DEBUG") mpilib = case.get_value("MPILIB") sysos = case.get_value("OS") expect(mach is not None, "xml variable MACH is not set") # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler # and env_mach_specific.xml if they don't already exist. if not os.path.isfile("Macros.make") or not os.path.isfile("env_mach_specific.xml"): configure(Machines(machine=mach), caseroot, ["Makefile"], compiler, mpilib, debug, sysos) # Set tasks to 1 if mpi-serial library if mpilib == "mpi-serial": for vid, value in case: if vid.startswith("NTASKS") and value != 1: case.set_value(vid, 1) # Check ninst. # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component. multi_driver = case.get_value("MULTI_DRIVER") nthrds = 1 for comp in models: ntasks = case.get_value("NTASKS_{}".format(comp)) nthrds = max(nthrds,case.get_value("NTHRDS_{}".format(comp))) if comp == "CPL": continue ninst = case.get_value("NINST_{}".format(comp)) if multi_driver: expect(case.get_value("NINST_LAYOUT_{}".format(comp)) == "concurrent", "If multi_driver is TRUE, NINST_LAYOUT_{} must be concurrent".format(comp)) case.set_value("NTASKS_PER_INST_{}".format(comp), ntasks) else: if ninst > ntasks: if ntasks == 1: case.set_value("NTASKS_{}".format(comp), ninst) ntasks = ninst else: expect(False, "NINST_{} value {:d} greater than NTASKS_{} {:d}".format(comp, ninst, comp, ntasks)) case.set_value("NTASKS_PER_INST_{}".format(comp), int(ntasks / ninst)) if nthrds > 1: case.set_value("BUILD_THREADED",True) if os.path.exists(get_batch_script_for_job(case.get_primary_job())): logger.info("Machine/Decomp/Pes configuration has already been done ...skipping") case.initialize_derived_attributes() case.set_value("SMP_PRESENT", case.get_build_threaded()) else: case.check_pelayouts_require_rebuild(models) unlock_file("env_build.xml") unlock_file("env_batch.xml") case.flush() case.check_lockedfiles() case.initialize_derived_attributes() cost_per_node = 16 if case.get_value("MACH") == "yellowstone" else case.get_value("MAX_MPITASKS_PER_NODE") case.set_value("COST_PES", case.num_nodes * cost_per_node) case.set_value("TOTALPES", case.total_tasks) case.set_value("SMP_PRESENT", case.get_build_threaded()) # create batch files env_batch = case.get_env("batch") env_batch.make_all_batch_files(case) if get_model() == "e3sm" and not case.get_value("TEST"): input_batch_script = os.path.join(case.get_value("MACHDIR"), "template.case.run.sh") env_batch.make_batch_script(input_batch_script, "case.run", case, outfile=get_batch_script_for_job("case.run.sh")) # May need to select new batch settings if pelayout changed (e.g. problem is now too big for prev-selected queue) env_batch.set_job_defaults([(case.get_primary_job(), {})], case) case.schedule_rewrite(env_batch) # Make a copy of env_mach_pes.xml in order to be able # to check that it does not change once case.setup is invoked case.flush() logger.debug("at copy TOTALPES = {}".format(case.get_value("TOTALPES"))) lock_file("env_mach_pes.xml") lock_file("env_batch.xml") # Create user_nl files for the required number of instances if not os.path.exists("user_nl_cpl"): logger.info("Creating user_nl_xxx files for components and cpl") # loop over models for model in models: comp = case.get_value("COMP_{}".format(model)) logger.debug("Building {} usernl files".format(model)) _build_usernl_files(case, model, comp) if comp == "cism": glcroot = case.get_value("COMP_ROOT_DIR_GLC") run_cmd_no_fail("{}/cime_config/cism.template {}".format(glcroot, caseroot)) _build_usernl_files(case, "drv", "cpl") # Create needed directories for case case.create_dirs() logger.info("If an old case build already exists, might want to run \'case.build --clean\' before building") # Some tests need namelists created here (ERP) - so do this if we are in test mode if test_mode or get_model() == "e3sm": logger.info("Generating component namelists as part of setup") case.create_namelists() # Record env information env_module = case.get_env("mach_specific") env_module.make_env_mach_specific_file("sh", case) env_module.make_env_mach_specific_file("csh", case) env_module.save_all_env_info("software_environment.txt")
cmd = gmake + " -f " + os.path.join(casetools, "Makefile") cmd += " {}".format(get_standard_makefile_args(case)) if cleanlist is not None: for item in cleanlist: tcmd = cmd + " clean" + item logger.info("calling {} ".format(tcmd)) run_cmd_no_fail(tcmd) else: for item in clean_depends: tcmd = cmd + " clean_depends" + item logger.info("calling {} ".format(tcmd)) run_cmd_no_fail(tcmd) # unlink Locked files directory unlock_file("env_build.xml") # reset following values in xml files case.set_value("SMP_BUILD", str(0)) case.set_value("NINST_BUILD", str(0)) case.set_value("BUILD_STATUS", str(0)) case.set_value("BUILD_COMPLETE", "FALSE") case.flush() ############################################################################### def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist, save_build_provenance): ############################################################################### t1 = time.time()
env_batch_has_changed = False try: case.check_lockedfile(os.path.basename(env_batch.filename)) except SystemExit: env_batch_has_changed = True if env_batch.get_batch_system_type() != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) logger.warning(\ """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! """) env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename)) lock_file(os.path.basename(env_batch.filename)) if job == case.get_primary_job(): case.check_case() case.check_DA_settings() if case.get_value("MACH") == "mira": with open(".original_host", "w") as fd: fd.write( socket.gethostname()) #Load Modules case.load_env() case.flush() logger.warning("submit_jobs {}".format(job))
format(ncfile=os.path.join(rundir, ncfile), casename=casename), ) # if case.submit is called with the no_batch flag then we assume that this # flag will stay in effect for the duration of the RESUBMITs env_batch = case.get_env("batch") external_workflow = case.get_value("EXTERNAL_WORKFLOW") if env_batch.get_batch_system_type( ) == "none" or resubmit and external_workflow: no_batch = True if no_batch: batch_system = "none" else: batch_system = env_batch.get_batch_system_type() unlock_file(os.path.basename(env_batch.filename), caseroot=caseroot) case.set_value("BATCH_SYSTEM", batch_system) env_batch_has_changed = False if not external_workflow: try: case.check_lockedfile(os.path.basename(env_batch.filename), caseroot=caseroot) except: env_batch_has_changed = True if batch_system != "none" and env_batch_has_changed and not external_workflow: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) logger.warning(""" env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost!
sys.path.append(_LIBDIR) import datetime, glob, shutil import CIME.build as build from standard_script_setup import * from CIME.case import Case from CIME.utils import safe_copy from argparse import RawTextHelpFormatter from CIME.locked_files import lock_file, unlock_file def per_run_case_updates(case, user_mods_dir, ensemble_str, nint, paramdict): print(">>>>> BUILDING CLONE CASE...") caseroot = case.get_value("CASEROOT") basecasename = os.path.basename(caseroot)[:-nint] unlock_file("env_case.xml",caseroot=caseroot) casename = basecasename+ensemble_str case.set_value("CASE",casename) rundir = case.get_value("RUNDIR") rundir = rundir[:-nint]+ensemble_str case.set_value("RUNDIR",rundir) case.flush() lock_file("env_case.xml",caseroot=caseroot) print("...Casename is {}".format(casename)) print("...Caseroot is {}".format(caseroot)) print("...Rundir is {}".format(rundir)) # restage user_nl files for each run for usermod in glob.iglob(user_mods_dir+"/user*"): safe_copy(usermod, caseroot)
def _submit(case, job=None, no_batch=False, prereq=None, allow_fail=False, resubmit=False, resubmit_immediate=False, skip_pnl=False, mail_user=None, mail_type=None, batch_args=None): if job is None: job = case.get_primary_job() # Check if CONTINUE_RUN value makes sense if job != "case.test" and case.get_value("CONTINUE_RUN"): rundir = case.get_value("RUNDIR") expect(os.path.isdir(rundir), "CONTINUE_RUN is true but RUNDIR {} does not exist".format(rundir)) # only checks for the first instance in a multidriver case if case.get_value("MULTI_DRIVER"): rpointer = "rpointer.drv_0001" else: rpointer = "rpointer.drv" expect(os.path.exists(os.path.join(rundir,rpointer)), "CONTINUE_RUN is true but this case does not appear to have restart files staged in {}".format(rundir)) # Finally we open the rpointer file and check that it's correct casename = case.get_value("CASE") with open(os.path.join(rundir,rpointer), "r") as fd: ncfile = fd.readline().strip() expect(ncfile.startswith(casename) and os.path.exists(os.path.join(rundir,ncfile)), "File {ncfile} not present or does not match case {casename}". format(ncfile=os.path.join(rundir,ncfile),casename=casename)) # if case.submit is called with the no_batch flag then we assume that this # flag will stay in effect for the duration of the RESUBMITs env_batch = case.get_env("batch") if resubmit and env_batch.get_batch_system_type() == "none": no_batch = True if no_batch: batch_system = "none" else: batch_system = env_batch.get_batch_system_type() case.set_value("BATCH_SYSTEM", batch_system) env_batch_has_changed = False try: case.check_lockedfile(os.path.basename(env_batch.filename)) except: env_batch_has_changed = True if batch_system != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) logger.warning(\ """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! """) env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename)) lock_file(os.path.basename(env_batch.filename)) if resubmit: # This is a resubmission, do not reinitialize test values if job == "case.test": case.set_value("IS_FIRST_RUN", False) resub = case.get_value("RESUBMIT") logger.info("Submitting job '{}', resubmit={:d}".format(job, resub)) case.set_value("RESUBMIT", resub-1) if case.get_value("RESUBMIT_SETS_CONTINUE_RUN"): case.set_value("CONTINUE_RUN", True) else: if job == "case.test": case.set_value("IS_FIRST_RUN", True) if no_batch: batch_system = "none" else: batch_system = env_batch.get_batch_system_type() case.set_value("BATCH_SYSTEM", batch_system) env_batch_has_changed = False try: case.check_lockedfile(os.path.basename(env_batch.filename)) except CIMEError: env_batch_has_changed = True if env_batch.get_batch_system_type() != "none" and env_batch_has_changed: # May need to regen batch files if user made batch setting changes (e.g. walltime, queue, etc) logger.warning(\ """ env_batch.xml appears to have changed, regenerating batch scripts manual edits to these file will be lost! """) env_batch.make_all_batch_files(case) unlock_file(os.path.basename(env_batch.filename)) lock_file(os.path.basename(env_batch.filename)) if job == case.get_primary_job(): case.check_case() case.check_DA_settings() if case.get_value("MACH") == "mira": with open(".original_host", "w") as fd: fd.write( socket.gethostname()) #Load Modules case.load_env() case.flush() logger.warning("submit_jobs {}".format(job)) job_ids = case.submit_jobs(no_batch=no_batch, job=job, prereq=prereq, skip_pnl=skip_pnl, resubmit_immediate=resubmit_immediate, allow_fail=allow_fail, mail_user=mail_user, mail_type=mail_type, batch_args=batch_args) xml_jobids = [] for jobname, jobid in job_ids.items(): logger.info("Submitted job {} with id {}".format(jobname, jobid)) if jobid: xml_jobids.append("{}:{}".format(jobname, jobid)) xml_jobid_text = ", ".join(xml_jobids) if xml_jobid_text: case.set_value("JOB_IDS", xml_jobid_text) return xml_jobid_text