Example #1
0
    def make_batch_script(self, input_template, job, case, outfile=None):
        expect(os.path.exists(input_template), "input file '{}' does not exist".format(input_template))
        task_count = self.get_value("task_count", subgroup=job)
        overrides = {}
        if task_count is not None:
            overrides["total_tasks"] = int(task_count)
            overrides["num_nodes"]   = int(math.ceil(float(task_count)/float(case.tasks_per_node)))
        else:
            task_count = case.get_value("TOTALPES")*int(case.thread_count)
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(case, job, overrides=overrides)
        overrides["mpirun"] = case.get_mpirun_cmd(job=job)
        output_text = transform_vars(open(input_template,"r").read(), case=case, subgroup=job, overrides=overrides)
        output_name = get_batch_script_for_job(job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(output_name, os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
Example #2
0
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(
            case, job, overrides=overrides)
        overrides["mpirun"] = case.get_mpirun_cmd(job=job)
        output_text = transform_vars(open(input_template, "r").read(),
                                     case=case,
                                     subgroup=job,
                                     overrides=overrides)
        output_name = get_batch_script_for_job(
            job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(
            output_name,
            os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
            | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == "none":
Example #3
0
    def _submit_single_job(self, case, job, dep_jobs=None, allow_fail=False,
                           no_batch=False, skip_pnl=False, mail_user=None, mail_type=None,
                           batch_args=None, dry_run=False, resubmit_immediate=False):
        if not dry_run:
            logger.warning("Submit job {}".format(job))
        batch_system = self.get_value("BATCH_SYSTEM", subgroup=None)
        if batch_system is None or batch_system == "none" or no_batch:
            logger.info("Starting job script {}".format(job))
            function_name = job.replace(".", "_")
            if not dry_run:
                args = self._build_run_args(job, True, skip_pnl=skip_pnl, set_continue_run=resubmit_immediate,
                                            submit_resubmits=not resubmit_immediate)
                try:
                    getattr(case, function_name)(**{k: v for k, (v, _) in args.items()})
                except Exception as e:
                    # We don't want exception from the run phases getting into submit phase
                    logger.warning("Exception from {}: {}".format(function_name, str(e)))

            return

        submitargs = self.get_submit_args(case, job)
        args_override = self.get_value("BATCH_COMMAND_FLAGS", subgroup=job)
        if args_override:
            submitargs = args_override

        if dep_jobs is not None and len(dep_jobs) > 0:
            logger.debug("dependencies: {}".format(dep_jobs))
            if allow_fail:
                dep_string = self.get_value("depend_allow_string", subgroup=None)
                if dep_string is None:
                    logger.warning("'depend_allow_string' is not defined for this batch system, " +
                                   "falling back to the 'depend_string'")
                    dep_string = self.get_value("depend_string", subgroup=None)
            else:
                dep_string = self.get_value("depend_string", subgroup=None)
            expect(dep_string is not None, "'depend_string' is not defined for this batch system")

            separator_string = self.get_value("depend_separator", subgroup=None)
            expect(separator_string is not None,"depend_separator string not defined")

            expect("jobid" in dep_string, "depend_string is missing jobid for prerequisite jobs")
            dep_ids_str = str(dep_jobs[0])
            for dep_id in dep_jobs[1:]:
                dep_ids_str += separator_string + str(dep_id)
            dep_string = dep_string.replace("jobid",dep_ids_str.strip()) # pylint: disable=maybe-no-member
            submitargs += " " + dep_string

        if batch_args is not None:
            submitargs += " " + batch_args

        cime_config = get_cime_config()

        if mail_user is None and cime_config.has_option("main", "MAIL_USER"):
            mail_user = cime_config.get("main", "MAIL_USER")

        if mail_user is not None:
            mail_user_flag = self.get_value('batch_mail_flag', subgroup=None)
            if mail_user_flag is not None:
                submitargs += " " + mail_user_flag + " " + mail_user

        if mail_type is None:
            if job == "case.test" and cime_config.has_option("create_test", "MAIL_TYPE"):
                mail_type = cime_config.get("create_test", "MAIL_TYPE")
            elif cime_config.has_option("main", "MAIL_TYPE"):
                mail_type = cime_config.get("main", "MAIL_TYPE")
            else:
                mail_type = self.get_value("batch_mail_default")

            if mail_type:
                mail_type = mail_type.split(",") # pylint: disable=no-member

        if mail_type:
            mail_type_flag = self.get_value("batch_mail_type_flag", subgroup=None)
            if mail_type_flag is not None:
                mail_type_args = []
                for indv_type in mail_type:
                    mail_type_arg = self.get_batch_mail_type(indv_type)
                    mail_type_args.append(mail_type_arg)

                if mail_type_flag == "-m":
                    # hacky, PBS-type systems pass multiple mail-types differently
                    submitargs += " {} {}".format(mail_type_flag, "".join(mail_type_args))
                else:
                    submitargs += " {} {}".format(mail_type_flag, " {} ".format(mail_type_flag).join(mail_type_args))
        batchsubmit = self.get_value("batch_submit", subgroup=None)
        expect(batchsubmit is not None,
               "Unable to determine the correct command for batch submission.")
        batchredirect = self.get_value("batch_redirect", subgroup=None)
        batch_env_flag = self.get_value("batch_env", subgroup=None)
        run_args = self._build_run_args_str(job, False, skip_pnl=skip_pnl, set_continue_run=resubmit_immediate,
                                            submit_resubmits=not resubmit_immediate)
        if batch_env_flag:
            sequence = (batchsubmit, submitargs, run_args, batchredirect, get_batch_script_for_job(job))
        else:
            sequence = (batchsubmit, submitargs, batchredirect, get_batch_script_for_job(job), run_args)

        submitcmd = " ".join(s.strip() for s in sequence if s is not None)

        if dry_run:
            return submitcmd
        else:
            logger.info("Submitting job script {}".format(submitcmd))
            output = run_cmd_no_fail(submitcmd, combine_output=True)
            jobid = self.get_job_id(output)
            logger.info("Submitted job id is {}".format(jobid))
            return jobid
Example #4
0
def _case_build_impl(caseroot, case, sharedlib_only, model_only, buildlist,
                     save_build_provenance):
###############################################################################

    t1 = time.time()

    expect(not (sharedlib_only and model_only),
           "Contradiction: both sharedlib_only and model_only")
    logger.info("Building case in directory {}".format(caseroot))
    logger.info("sharedlib_only is {}".format(sharedlib_only))
    logger.info("model_only is {}".format(model_only))

    expect(os.path.isdir(caseroot), "'{}' is not a valid directory".format(caseroot))
    os.chdir(caseroot)

    expect(os.path.exists(get_batch_script_for_job(case.get_primary_job())),
           "ERROR: must invoke case.setup script before calling build script ")

    cimeroot = case.get_value("CIMEROOT")

    comp_classes = case.get_values("COMP_CLASSES")

    case.check_lockedfiles(skip="env_batch")

    # Retrieve relevant case data
    # This environment variable gets set for cesm Make and
    # needs to be unset before building again.
    if "MODEL" in os.environ:
        del os.environ["MODEL"]
    build_threaded      = case.get_build_threaded()
    casetools           = case.get_value("CASETOOLS")
    exeroot             = os.path.abspath(case.get_value("EXEROOT"))
    incroot             = os.path.abspath(case.get_value("INCROOT"))
    libroot             = os.path.abspath(case.get_value("LIBROOT"))
    sharedlibroot       = os.path.abspath(case.get_value("SHAREDLIBROOT"))
    multi_driver = case.get_value("MULTI_DRIVER")
    complist = []
    ninst = 1
    for comp_class in comp_classes:
        if comp_class == "CPL":
            config_dir = None
            if multi_driver:
                ninst = case.get_value("NINST_MAX")
        else:
            config_dir = os.path.dirname(case.get_value("CONFIG_{}_FILE".format(comp_class)))
            if multi_driver:
                ninst = 1
            else:
                ninst = case.get_value("NINST_{}".format(comp_class))

        comp = case.get_value("COMP_{}".format(comp_class))
        thrds =  case.get_value("NTHRDS_{}".format(comp_class))
        expect(ninst is not None,"Failed to get ninst for comp_class {}".format(comp_class))
        complist.append((comp_class.lower(), comp, thrds, ninst, config_dir ))
        os.environ["COMP_{}".format(comp_class)] = comp

    ocn_submodel        = case.get_value("OCN_SUBMODEL")
    profile_papi_enable = case.get_value("PROFILE_PAPI_ENABLE")
    compiler            = case.get_value("COMPILER")
    comp_interface      = case.get_value("COMP_INTERFACE")
    mpilib              = case.get_value("MPILIB")
    use_esmf_lib        = case.get_value("USE_ESMF_LIB")
    debug               = case.get_value("DEBUG")
    ninst_build         = case.get_value("NINST_BUILD")
    smp_value           = case.get_value("SMP_VALUE")
    clm_use_petsc       = case.get_value("CLM_USE_PETSC")
    cism_use_trilinos   = case.get_value("CISM_USE_TRILINOS")
    mali_use_albany     = case.get_value("MALI_USE_ALBANY")
    use_moab            = case.get_value("USE_MOAB")
    clm_config_opts     = case.get_value("CLM_CONFIG_OPTS")
    cam_config_opts     = case.get_value("CAM_CONFIG_OPTS")
    pio_config_opts     = case.get_value("PIO_CONFIG_OPTS")
    ninst_value         = case.get_value("NINST_VALUE")
    mach                = case.get_value("MACH")
    os_                 = case.get_value("OS")
    # Load some params into env
    os.environ["CIMEROOT"]             = cimeroot
    os.environ["CASETOOLS"]            = casetools
    os.environ["EXEROOT"]              = exeroot
    os.environ["INCROOT"]              = incroot
    os.environ["LIBROOT"]              = libroot
    os.environ["SHAREDLIBROOT"]        = sharedlibroot
    os.environ["CASEROOT"]             = caseroot
    os.environ["COMPILER"]             = compiler
    os.environ["COMP_INTERFACE"]       = comp_interface
    os.environ["NINST_VALUE"]          = str(ninst_value)
    os.environ["BUILD_THREADED"]       = stringify_bool(build_threaded)
    os.environ["MACH"]                 = mach
    os.environ["USE_ESMF_LIB"]         = stringify_bool(use_esmf_lib)
    os.environ["MPILIB"]               = mpilib
    os.environ["DEBUG"]                = stringify_bool(debug)
    os.environ["OS"]                   = os_
    os.environ["CLM_CONFIG_OPTS"]      = clm_config_opts     if clm_config_opts     is not None else ""
    os.environ["CAM_CONFIG_OPTS"]      = cam_config_opts     if cam_config_opts     is not None else ""
    os.environ["PIO_CONFIG_OPTS"]      = pio_config_opts     if pio_config_opts     is not None else ""
    os.environ["OCN_SUBMODEL"]         = ocn_submodel        if ocn_submodel        is not None else ""
    os.environ["PROFILE_PAPI_ENABLE"]  = stringify_bool(profile_papi_enable)
    os.environ["CLM_USE_PETSC"]        = stringify_bool(clm_use_petsc)
    os.environ["CISM_USE_TRILINOS"]    = stringify_bool(cism_use_trilinos)
    os.environ["MALI_USE_ALBANY"]      = stringify_bool(mali_use_albany)
    os.environ["USE_MOAB"]             = stringify_bool(use_moab)

    if get_model() == "e3sm" and mach == "titan" and compiler == "pgiacc":
        case.set_value("CAM_TARGET", "preqx_acc")

    # This is a timestamp for the build , not the same as the testid,
    # and this case may not be a test anyway. For a production
    # experiment there may be many builds of the same case.
    lid               = get_timestamp("%y%m%d-%H%M%S")
    os.environ["LID"] = lid

    # Set the overall USE_PETSC variable to TRUE if any of the
    # *_USE_PETSC variables are TRUE.
    # For now, there is just the one CLM_USE_PETSC variable, but in
    # the future there may be others -- so USE_PETSC will be true if
    # ANY of those are true.

    use_petsc = clm_use_petsc
    case.set_value("USE_PETSC", use_petsc)
    os.environ["USE_PETSC"] = stringify_bool(use_petsc)

    # Set the overall USE_TRILINOS variable to TRUE if any of the
    # *_USE_TRILINOS variables are TRUE.
    # For now, there is just the one CISM_USE_TRILINOS variable, but in
    # the future there may be others -- so USE_TRILINOS will be true if
    # ANY of those are true.

    use_trilinos = False if cism_use_trilinos is None else cism_use_trilinos
    case.set_value("USE_TRILINOS", use_trilinos)
    os.environ["USE_TRILINOS"] = stringify_bool(use_trilinos)

    # Set the overall USE_ALBANY variable to TRUE if any of the
    # *_USE_ALBANY variables are TRUE.
    # For now, there is just the one MALI_USE_ALBANY variable, but in
    # the future there may be others -- so USE_ALBANY will be true if
    # ANY of those are true.

    use_albany = stringify_bool(mali_use_albany)
    case.set_value("USE_ALBANY", use_albany)
    os.environ["USE_ALBANY"] = use_albany

    # Load modules
    case.load_env()

    sharedpath = _build_checks(case, build_threaded, comp_interface,
                               use_esmf_lib, debug, compiler, mpilib,
                               complist, ninst_build, smp_value, model_only, buildlist)

    t2 = time.time()
    logs = []

    if not model_only:
        logs = _build_libraries(case, exeroot, sharedpath, caseroot,
                                cimeroot, libroot, lid, compiler, buildlist, comp_interface)

    if not sharedlib_only:
        os.environ["INSTALL_SHAREDPATH"] = os.path.join(exeroot, sharedpath) # for MPAS makefile generators
        logs.extend(_build_model(build_threaded, exeroot, clm_config_opts, incroot, complist,
                                lid, caseroot, cimeroot, compiler, buildlist, comp_interface))

        if not buildlist:
            # in case component build scripts updated the xml files, update the case object
            case.read_xml()
            # Note, doing buildlists will never result in the system thinking the build is complete

    post_build(case, logs, build_complete=not (buildlist or sharedlib_only),
               save_build_provenance=save_build_provenance)

    t3 = time.time()

    if not sharedlib_only:
        logger.info("Time spent not building: {:f} sec".format(t2 - t1))
        logger.info("Time spent building: {:f} sec".format(t3 - t2))
        logger.info("MODEL BUILD HAS FINISHED SUCCESSFULLY")

    return True
Example #5
0
        ext = os.path.splitext(job)[-1]
        if len(ext) == 0:
            ext = job
        if ext.startswith('.'):
            ext = ext[1:]
        overrides["job_id"] = ext + '.' + case.get_value("CASE")
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]
        overrides["batchdirectives"] = self.get_batch_directives(
            case, job, overrides=overrides)
        output_text = transform_vars(open(input_template, "r").read(),
                                     case=case,
                                     subgroup=job,
                                     overrides=overrides)
        output_name = get_batch_script_for_job(
            job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(
            output_name,
            os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
            | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == "none":
Example #6
0
            task_count = case.get_value("TOTALPES") * int(case.thread_count)
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(
            case, job, overrides=overrides)
        output_text = transform_vars(open(input_template, "r").read(),
                                     case=case,
                                     subgroup=job,
                                     overrides=overrides)
        output_name = get_batch_script_for_job(job)

        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(
            output_name,
            os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
            | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == 'none':
Example #7
0
            if threaded and case.total_tasks * case.thread_count > cost_per_node:
                smt_factor = max(1.0,int(case.get_value("MAX_TASKS_PER_NODE") / cost_per_node))
                case.set_value("TOTALPES", int(case.total_tasks * max(1.0,float(case.thread_count) / smt_factor)))
            else:
                case.set_value("TOTALPES", case.total_tasks*case.thread_count)


            # May need to select new batch settings if pelayout changed (e.g. problem is now too big for prev-selected queue)
            env_batch = case.get_env("batch")
            env_batch.set_job_defaults([(case.get_primary_job(), {})], case)

            # create batch files
            env_batch.make_all_batch_files(case)
            if get_model() == "e3sm" and not case.get_value("TEST"):
                input_batch_script = os.path.join(case.get_value("MACHDIR"), "template.case.run.sh")
                env_batch.make_batch_script(input_batch_script, "case.run", case, outfile=get_batch_script_for_job("case.run.sh"))

            # Make a copy of env_mach_pes.xml in order to be able
            # to check that it does not change once case.setup is invoked
            case.flush()
            logger.debug("at copy TOTALPES = {}".format(case.get_value("TOTALPES")))
            lock_file("env_mach_pes.xml")
            lock_file("env_batch.xml")

        # Create user_nl files for the required number of instances
        if not os.path.exists("user_nl_cpl"):
            logger.info("Creating user_nl_xxx files for components and cpl")

        # loop over models
        for model in models:
            comp = case.get_value("COMP_{}".format(model))
Example #8
0
                    # hacky, PBS-type systems pass multiple mail-types differently
                    submitargs += " {} {}".format(mail_type_flag,
                                                  "".join(mail_type_args))
                else:
                    submitargs += " {} {}".format(
                        mail_type_flag,
                        " {} ".format(mail_type_flag).join(mail_type_args))

        batchsubmit = self.get_value("batch_submit", subgroup=None)
        expect(
            batchsubmit is not None,
            "Unable to determine the correct command for batch submission.")
        batchredirect = self.get_value("batch_redirect", subgroup=None)
        submitcmd = ''
        for string in (batchsubmit, submitargs, batchredirect,
                       get_batch_script_for_job(job)):
            if string is not None:
                submitcmd += string + " "

        if job == 'case.run' and skip_pnl:
            batch_env_flag = self.get_value("batch_env", subgroup=None)
            if not batch_env_flag:
                submitcmd += " --skip-preview-namelist"
            else:
                submitcmd += " {} ARGS_FOR_SCRIPT='--skip-preview-namelist'".format(
                    batch_env_flag)

        if dry_run:
            return submitcmd
        else:
            logger.info("Submitting job script {}".format(submitcmd))
Example #9
0
class EnvBatch(EnvBase):
    def __init__(self, case_root=None, infile="env_batch.xml"):
        """
        initialize an object interface to file env_batch.xml in the case directory
        """
        self._batchtype = None
        # This arbitrary setting should always be overwritten
        self._default_walltime = "00:20:00"
        schema = os.path.join(get_cime_root(), "config", "xml_schemas",
                              "env_batch.xsd")
        EnvBase.__init__(self, case_root, infile, schema=schema)

    # pylint: disable=arguments-differ
    def set_value(self, item, value, subgroup=None, ignore_type=False):
        """
        Override the entry_id set_value function with some special cases for this class
        """
        val = None
        if item == "JOB_WALLCLOCK_TIME":
            #Most systems use %H:%M:%S format for wallclock but LSF
            #uses %H:%M this code corrects the value passed in to be
            #the correct format - if we find we have more exceptions
            #than this we may need to generalize this further
            walltime_format = self.get_value("walltime_format", subgroup=None)
            if walltime_format is not None and walltime_format.count(
                    ":") != value.count(":"):  # pylint: disable=maybe-no-member
                if value.count(":") == 1:
                    t_spec = "%H:%M"
                elif value.count(":") == 2:
                    t_spec = "%H:%M:%S"
                else:
                    expect(
                        False,
                        "could not interpret format for wallclock time {}".
                        format(value))
                value = format_time(walltime_format, t_spec, value)

        # allow the user to set item for all jobs if subgroup is not provided
        if subgroup is None:
            nodes = self.get_children("entry", {"id": item})
            for node in nodes:
                self._set_value(node, value, vid=item, ignore_type=ignore_type)
                val = value
        else:
            group = self.get_optional_child("group", {"id": subgroup})
            if group is not None:
                node = self.get_optional_child("entry", {"id": item},
                                               root=group)
                if node is not None:
                    val = self._set_value(node,
                                          value,
                                          vid=item,
                                          ignore_type=ignore_type)

        return val

    # pylint: disable=arguments-differ
    def get_value(self,
                  item,
                  attribute=None,
                  resolved=True,
                  subgroup="case.run"):
        """
        Must default subgroup to something in order to provide single return value
        """

        value = None
        if subgroup is None:
            node = self.get_optional_child(item, attribute)
            if node is not None:
                value = self.text(node)
                if resolved:
                    value = self.get_resolved_value(value)
            else:
                value = EnvBase.get_value(self, item, attribute, resolved)
        else:
            value = EnvBase.get_value(self,
                                      item,
                                      attribute=attribute,
                                      resolved=resolved,
                                      subgroup=subgroup)

        return value

    def get_type_info(self, vid):
        nodes = self.get_children("entry", {"id": vid})
        type_info = None
        for node in nodes:
            new_type_info = self._get_type_info(node)
            if type_info is None:
                type_info = new_type_info
            else:
                expect(
                    type_info == new_type_info,
                    "Inconsistent type_info for entry id={} {} {}".format(
                        vid, new_type_info, type_info))
        return type_info

    def get_jobs(self):
        groups = self.get_children("group")
        results = []
        for group in groups:
            if self.get(group, "id") not in ["job_submission", "config_batch"]:
                results.append(self.get(group, "id"))

        return results

    def create_job_groups(self, batch_jobs):
        # Subtle: in order to support dynamic batch jobs, we need to remove the
        # job_submission group and replace with job-based groups

        orig_group = self.get_child(
            "group", {"id": "job_submission"},
            err_msg="Looks like job groups have already been created")
        orig_group_children = EnvBase.get_children(self,
                                                   root=orig_group,
                                                   no_validate=True)

        childnodes = []
        for child in reversed(orig_group_children):
            childnodes.append(self.copy(child))

        self.remove_child(orig_group)

        for name, jdict in batch_jobs:
            new_job_group = self.make_child("group", {"id": name})
            for field in jdict.keys():
                val = jdict[field]
                node = self.make_child("entry", {
                    "id": field,
                    "value": val
                },
                                       root=new_job_group)
                self.make_child("type", root=node, text="char")

            for child in childnodes:
                self.add_child(child, root=new_job_group)

    def cleanupnode(self, node):
        if self.get(node, "id") == "batch_system":
            fnode = self.get_child(name="file", root=node)
            self.remove_child(fnode, root=node)
            gnode = self.get_child(name="group", root=node)
            self.remove_child(gnode, root=node)
            vnode = self.get_optional_child(name="values", root=node)
            if vnode is not None:
                self.remove_child(vnode, root=node)
        else:
            node = EnvBase.cleanupnode(self, node)
        return node

    def set_batch_system(self, batchobj, batch_system_type=None):
        if batch_system_type is not None:
            self.set_batch_system_type(batch_system_type)

        if batchobj.batch_system_node is not None and batchobj.machine_node is not None:
            for node in batchobj.get_children(root=batchobj.machine_node,
                                              no_validate=True):
                oldnode = batchobj.get_optional_child(
                    self.name(node), root=batchobj.batch_system_node)
                if oldnode is not None and self.name(oldnode) != "directives":
                    logger.debug("Replacing {}".format(self.name(oldnode)))
                    batchobj.remove_child(oldnode,
                                          root=batchobj.batch_system_node)

        if batchobj.batch_system_node is not None:
            self.add_child(self.copy(batchobj.batch_system_node))
        if batchobj.machine_node is not None:
            self.add_child(self.copy(batchobj.machine_node))

    def make_batch_script(self, input_template, job, case):
        expect(os.path.exists(input_template),
               "input file '{}' does not exist".format(input_template))

        task_count = self.get_value("task_count", subgroup=job)
        overrides = {}
        if task_count is not None:
            overrides["total_tasks"] = int(task_count)
            overrides["num_nodes"] = int(
                math.ceil(float(task_count) / float(case.tasks_per_node)))

        overrides["pedocumentation"] = ""  # TODO?
        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(
            case, job, overrides=overrides)

        output_text = transform_vars(open(input_template, "r").read(),
                                     case=case,
                                     subgroup=job,
                                     overrides=overrides)
        output_name = get_batch_script_for_job(job)
        with open(output_name, "w") as fd:
            fd.write(output_text)
        os.chmod(
            output_name,
            os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
            | stat.S_IXOTH)
Example #10
0
    ###############################################################################

    t1 = time.time()

    expect(not (sharedlib_only and model_only),
           "Contradiction: both sharedlib_only and model_only")
    logger.info("Building case in directory {}".format(caseroot))
    logger.info("sharedlib_only is {}".format(sharedlib_only))
    logger.info("model_only is {}".format(model_only))

    expect(os.path.isdir(caseroot),
           "'{}' is not a valid directory".format(caseroot))
    os.chdir(caseroot)

    expect(
        os.path.exists(get_batch_script_for_job("case.run")),
        "ERROR: must invoke case.setup script before calling build script ")

    cimeroot = case.get_value("CIMEROOT")

    comp_classes = case.get_values("COMP_CLASSES")

    check_lockedfiles(case)

    # Retrieve relevant case data
    # This environment variable gets set for cesm Make and
    # needs to be unset before building again.
    if "MODEL" in os.environ:
        del os.environ["MODEL"]
    build_threaded = case.get_build_threaded()
    casetools = case.get_value("CASETOOLS")
Example #11
0
        if task_count is not None:
            overrides["total_tasks"] = int(task_count)
            overrides["num_nodes"]   = int(math.ceil(float(task_count)/float(case.tasks_per_node)))
        else:
            task_count = case.get_value("TOTALPES")*int(case.thread_count)
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(case, job, overrides=overrides)
        output_text = transform_vars(open(input_template,"r").read(), case=case, subgroup=job, overrides=overrides)
        output_name = get_batch_script_for_job(job)

        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(output_name, os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == 'none':
            return

        for job, jsect in batch_jobs:
Example #12
0
    def _submit_single_job(self, case, job, dep_jobs=None, no_batch=False,
                           skip_pnl=False, mail_user=None, mail_type=None,
                           batch_args=None, dry_run=False):
        if not dry_run:
            logger.warning("Submit job {}".format(job))
        batch_system = self.get_value("BATCH_SYSTEM", subgroup=None)
        if batch_system is None or batch_system == "none" or no_batch:
            logger.info("Starting job script {}".format(job))
            function_name = job.replace(".", "_")
            if not dry_run:
                if "archive" not in function_name:
                    getattr(case,function_name)(skip_pnl=skip_pnl)
                else:
                    getattr(case,function_name)()

            return

        submitargs = self.get_submit_args(case, job)
        args_override = self.get_value("BATCH_COMMAND_FLAGS", subgroup=job)
        if args_override:
            submitargs = args_override

        if dep_jobs is not None and len(dep_jobs) > 0:
            logger.info("dependencies: {}".format(dep_jobs))
            dep_string = self.get_value("depend_string", subgroup=None)
            separator_string = self.get_value("depend_separator", subgroup=None)
            expect(separator_string is not None,"depend_separator string not defined")
            expect("jobid" in dep_string, "depend_string is missing jobid for prerequisite jobs")
            dep_ids_str = str(dep_jobs[0])
            for dep_id in dep_jobs[1:]:
                dep_ids_str += separator_string + str(dep_id)
            dep_string = dep_string.replace("jobid",dep_ids_str.strip()) # pylint: disable=maybe-no-member
            submitargs += " " + dep_string

        if batch_args is not None:
            submitargs += " " + batch_args

        cime_config = get_cime_config()

        if mail_user is None and cime_config.has_option("main", "MAIL_USER"):
            mail_user = cime_config.get("main", "MAIL_USER")

        if mail_user is not None:
            mail_user_flag = self.get_value('batch_mail_flag', subgroup=None)
            if mail_user_flag is not None:
                submitargs += " " + mail_user_flag + " " + mail_user

        if mail_type is None:
            if job == "case.test" and cime_config.has_option("create_test", "MAIL_TYPE"):
                mail_type = cime_config.get("create_test", "MAIL_TYPE")
            elif cime_config.has_option("main", "MAIL_TYPE"):
                mail_type = cime_config.get("main", "MAIL_TYPE")
            else:
                mail_type = self.get_value("batch_mail_default")

            if mail_type:
                mail_type = mail_type.split(",") # pylint: disable=no-member

        if mail_type:
            mail_type_flag = self.get_value("batch_mail_type_flag", subgroup=None)
            if mail_type_flag is not None:
                mail_type_args = []
                for indv_type in mail_type:
                    mail_type_arg = self.get_batch_mail_type(indv_type)
                    mail_type_args.append(mail_type_arg)

                if mail_type_flag == "-m":
                    # hacky, PBS-type systems pass multiple mail-types differently
                    submitargs += " {} {}".format(mail_type_flag, "".join(mail_type_args))
                else:
                    submitargs += " {} {}".format(mail_type_flag, " {} ".format(mail_type_flag).join(mail_type_args))
        batchsubmit = self.get_value("batch_submit", subgroup=None)
        expect(batchsubmit is not None,
               "Unable to determine the correct command for batch submission.")
        batchredirect = self.get_value("batch_redirect", subgroup=None)
        submitcmd = ''
        batch_env_flag = self.get_value("batch_env", subgroup=None)
        if batch_env_flag:
            sequence = (batchsubmit, submitargs, "skip_pnl", batchredirect, get_batch_script_for_job(job))
        else:
            sequence = (batchsubmit, submitargs, batchredirect, get_batch_script_for_job(job), "skip_pnl")

        for string in sequence:
            if string == "skip_pnl":
                if job in ['case.run', 'case.test'] and skip_pnl:
                    batch_env_flag = self.get_value("batch_env", subgroup=None)
                    if not batch_env_flag:
                        submitcmd += " --skip-preview-namelist "
                    else:
                        submitcmd += " {} ARGS_FOR_SCRIPT='--skip-preview-namelist' ".format(batch_env_flag)

            elif string is not None:
                submitcmd += string + " "

        if dry_run:
            return submitcmd
        else:
            logger.info("Submitting job script {}".format(submitcmd))
            output = run_cmd_no_fail(submitcmd, combine_output=True)
            jobid = self.get_job_id(output)
            logger.info("Submitted job id is {}".format(jobid))
            return jobid
Example #13
0
                     caseroot,
                     clean=False,
                     test_mode=False,
                     reset=False):
    ###############################################################################
    os.chdir(caseroot)

    # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests
    din_loc_root = case.get_value("DIN_LOC_ROOT")
    testcase = case.get_value("TESTCASE")
    expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"),
           "inputdata root is not a directory: {}".format(din_loc_root))

    # Remove batch scripts
    if reset or clean:
        case_run, case_test = get_batch_script_for_job(
            "case.run"), get_batch_script_for_job("case.test")
        if os.path.exists(case_run):
            os.remove(case_run)

        if not test_mode:
            # rebuild the models (even on restart)
            case.set_value("BUILD_COMPLETE", False)

            # backup and then clean test script
            if os.path.exists(case_test):
                os.remove(case_test)
                logger.info(
                    "Successfully cleaned test script {}".format(case_test))

        logger.info("Successfully cleaned batch script case.run")
Example #14
0
            overrides["total_tasks"] = int(task_count)
            overrides["num_nodes"]   = int(math.ceil(float(task_count)/float(case.tasks_per_node)))
        else:
            task_count = case.get_value("TOTALPES")*int(case.thread_count)
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(case, job, overrides=overrides)
        overrides["mpirun"] = case.get_mpirun_cmd(job=job)
        output_text = transform_vars(open(input_template,"r").read(), case=case, subgroup=job, overrides=overrides)
        output_name = get_batch_script_for_job(job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(output_name, os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == "none":
            return

        known_jobs = self.get_jobs()
Example #15
0
    def make_batch_script(self, input_template, job, case, outfile=None):
        expect(os.path.exists(input_template), "input file '{}' does not exist".format(input_template))
        overrides = self.get_job_overrides(job, case)
        ext = os.path.splitext(job)[-1]
        if len(ext) == 0:
            ext = job
        if ext.startswith('.'):
            ext = ext[1:]
        overrides["job_id"] = ext + '.' + case.get_value("CASE")
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]
        overrides["batchdirectives"] = self.get_batch_directives(case, job, overrides=overrides)
        output_text = transform_vars(open(input_template,"r").read(), case=case, subgroup=job, overrides=overrides)
        output_name = get_batch_script_for_job(job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(output_name, os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == "none":
            return
        env_workflow = case.get_env('workflow')
        known_jobs = env_workflow.get_jobs()
Example #16
0
def _case_setup_impl(case, caseroot, clean=False, test_mode=False, reset=False):
###############################################################################
    os.chdir(caseroot)

    # Check that $DIN_LOC_ROOT exists - and abort if not a namelist compare tests
    din_loc_root = case.get_value("DIN_LOC_ROOT")
    testcase     = case.get_value("TESTCASE")
    expect(not (not os.path.isdir(din_loc_root) and testcase != "SBN"),
           "inputdata root is not a directory: {}".format(din_loc_root))

    # Remove batch scripts
    if reset or clean:
        # clean batch script
        batch_script = get_batch_script_for_job(case.get_primary_job())
        if os.path.exists(batch_script):
            os.remove(batch_script)
            logger.info("Successfully cleaned batch script {}".format(batch_script))

        if not test_mode:
            # rebuild the models (even on restart)
            case.set_value("BUILD_COMPLETE", False)

    if not clean:
        case.load_env()

        models = case.get_values("COMP_CLASSES")
        mach = case.get_value("MACH")
        compiler = case.get_value("COMPILER")
        debug = case.get_value("DEBUG")
        mpilib = case.get_value("MPILIB")
        sysos = case.get_value("OS")
        expect(mach is not None, "xml variable MACH is not set")

        # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler
        # and env_mach_specific.xml if they don't already exist.
        if not os.path.isfile("Macros.make") or not os.path.isfile("env_mach_specific.xml"):
            configure(Machines(machine=mach), caseroot, ["Makefile"], compiler, mpilib, debug, sysos)

        # Set tasks to 1 if mpi-serial library
        if mpilib == "mpi-serial":
            for vid, value in case:
                if vid.startswith("NTASKS") and value != 1:
                    case.set_value(vid, 1)

        # Check ninst.
        # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component.
        multi_driver = case.get_value("MULTI_DRIVER")
        nthrds = 1
        for comp in models:
            ntasks = case.get_value("NTASKS_{}".format(comp))
            nthrds = max(nthrds,case.get_value("NTHRDS_{}".format(comp)))
            if comp == "CPL":
                continue
            ninst  = case.get_value("NINST_{}".format(comp))
            if multi_driver:
                expect(case.get_value("NINST_LAYOUT_{}".format(comp)) == "concurrent",
                       "If multi_driver is TRUE, NINST_LAYOUT_{} must be concurrent".format(comp))
                case.set_value("NTASKS_PER_INST_{}".format(comp), ntasks)
            else:
                if ninst > ntasks:
                    if ntasks == 1:
                        case.set_value("NTASKS_{}".format(comp), ninst)
                        ntasks = ninst
                    else:
                        expect(False, "NINST_{} value {:d} greater than NTASKS_{} {:d}".format(comp, ninst, comp, ntasks))
                case.set_value("NTASKS_PER_INST_{}".format(comp), int(ntasks / ninst))
        if nthrds > 1:
            case.set_value("BUILD_THREADED",True)

        if os.path.exists(get_batch_script_for_job(case.get_primary_job())):
            logger.info("Machine/Decomp/Pes configuration has already been done ...skipping")

            case.initialize_derived_attributes()

            case.set_value("SMP_PRESENT", case.get_build_threaded())

        else:
            case.check_pelayouts_require_rebuild(models)

            unlock_file("env_build.xml")
            unlock_file("env_batch.xml")

            case.flush()
            case.check_lockedfiles()

            case.initialize_derived_attributes()

            cost_per_node = 16 if case.get_value("MACH") == "yellowstone" else case.get_value("MAX_MPITASKS_PER_NODE")
            case.set_value("COST_PES", case.num_nodes * cost_per_node)
            case.set_value("TOTALPES", case.total_tasks)
            case.set_value("SMP_PRESENT", case.get_build_threaded())

            # create batch files
            env_batch = case.get_env("batch")
            env_batch.make_all_batch_files(case)
            if get_model() == "e3sm" and not case.get_value("TEST"):
                input_batch_script = os.path.join(case.get_value("MACHDIR"), "template.case.run.sh")
                env_batch.make_batch_script(input_batch_script, "case.run", case, outfile=get_batch_script_for_job("case.run.sh"))

            # May need to select new batch settings if pelayout changed (e.g. problem is now too big for prev-selected queue)
            env_batch.set_job_defaults([(case.get_primary_job(), {})], case)
            case.schedule_rewrite(env_batch)

            # Make a copy of env_mach_pes.xml in order to be able
            # to check that it does not change once case.setup is invoked
            case.flush()
            logger.debug("at copy TOTALPES = {}".format(case.get_value("TOTALPES")))
            lock_file("env_mach_pes.xml")
            lock_file("env_batch.xml")

        # Create user_nl files for the required number of instances
        if not os.path.exists("user_nl_cpl"):
            logger.info("Creating user_nl_xxx files for components and cpl")

        # loop over models
        for model in models:
            comp = case.get_value("COMP_{}".format(model))
            logger.debug("Building {} usernl files".format(model))
            _build_usernl_files(case, model, comp)
            if comp == "cism":
                glcroot = case.get_value("COMP_ROOT_DIR_GLC")
                run_cmd_no_fail("{}/cime_config/cism.template {}".format(glcroot, caseroot))

        _build_usernl_files(case, "drv", "cpl")

        # Create needed directories for case
        case.create_dirs()

        logger.info("If an old case build already exists, might want to run \'case.build --clean\' before building")

        # Some tests need namelists created here (ERP) - so do this if we are in test mode
        if test_mode or get_model() == "e3sm":
            logger.info("Generating component namelists as part of setup")
            case.create_namelists()

        # Record env information
        env_module = case.get_env("mach_specific")
        env_module.make_env_mach_specific_file("sh", case)
        env_module.make_env_mach_specific_file("csh", case)
        env_module.save_all_env_info("software_environment.txt")
Example #17
0
                               case.total_tasks * case.thread_count)

            # May need to select new batch settings if pelayout changed (e.g. problem is now too big for prev-selected queue)
            env_batch = case.get_env("batch")
            env_batch.set_job_defaults([(case.get_primary_job(), {})], case)

            # create batch files
            env_batch.make_all_batch_files(case)
            if get_model() == "e3sm" and not case.get_value("TEST"):
                input_batch_script = os.path.join(case.get_value("MACHDIR"),
                                                  "template.case.run.sh")
                env_batch.make_batch_script(
                    input_batch_script,
                    "case.run",
                    case,
                    outfile=get_batch_script_for_job("case.run.sh"))

            # Make a copy of env_mach_pes.xml in order to be able
            # to check that it does not change once case.setup is invoked
            case.flush()
            logger.debug("at copy TOTALPES = {}".format(
                case.get_value("TOTALPES")))
            lock_file("env_mach_pes.xml")
            lock_file("env_batch.xml")

        # Create user_nl files for the required number of instances
        if not os.path.exists("user_nl_cpl"):
            logger.info("Creating user_nl_xxx files for components and cpl")

        # loop over models
        for model in models:
Example #18
0
        if int(task_count) < case.get_value("MAX_TASKS_PER_NODE"):
            overrides["max_tasks_per_node"] = int(task_count)

        overrides["job_id"] = case.get_value("CASE") + os.path.splitext(job)[1]
        if "pleiades" in case.get_value("MACH"):
            # pleiades jobname needs to be limited to 15 chars
            overrides["job_id"] = overrides["job_id"][:15]

        overrides["batchdirectives"] = self.get_batch_directives(
            case, job, overrides=overrides)
        overrides["mpirun"] = case.get_mpirun_cmd(job=job)
        output_text = transform_vars(open(input_template, "r").read(),
                                     case=case,
                                     subgroup=job,
                                     overrides=overrides)
        output_name = get_batch_script_for_job(
            job) if outfile is None else outfile
        logger.info("Creating file {}".format(output_name))
        with open(output_name, "w") as fd:
            fd.write(output_text)

        # make sure batch script is exectuble
        os.chmod(
            output_name,
            os.stat(output_name).st_mode | stat.S_IXUSR | stat.S_IXGRP
            | stat.S_IXOTH)

    def set_job_defaults(self, batch_jobs, case):
        if self._batchtype is None:
            self._batchtype = self.get_batch_system_type()

        if self._batchtype == 'none':