Ejemplo n.º 1
0
def get_test_suite(suite, machine=None, compiler=None):
    ###############################################################################
    """
    Return a list of FULL test names for a suite.
    """
    expect(suite in _TEST_SUITES, "Unknown test suite: '%s'" % suite)
    machobj = Machines(machine=machine)
    machine = machobj.get_machine_name()

    if (compiler is None):
        compiler = machobj.get_default_compiler()
    expect(machobj.is_valid_compiler(compiler),
           "Compiler %s not valid for machine %s" % (compiler, machine))

    inherits_from, tests_raw = _TEST_SUITES[suite]
    tests = []
    for item in tests_raw:
        test_mod = None
        if (isinstance(item, str)):
            test_name = item
        else:
            expect(isinstance(item, tuple),
                   "Bad item type for item '%s'" % str(item))
            expect(
                len(item) in [2, 3],
                "Expected two or three items in item '%s'" % str(item))
            expect(isinstance(item[0], str),
                   "Expected string in first field of item '%s'" % str(item))
            expect(isinstance(item[1], str),
                   "Expected string in second field of item '%s'" % str(item))

            test_name = item[0]
            if (len(item) == 2):
                test_mod = item[1]
            else:
                expect(
                    type(item[2]) in [str, tuple],
                    "Expected string or tuple for third field of item '%s'" %
                    str(item))
                test_mod_machines = [item[2]] if isinstance(item[2],
                                                            str) else item[2]
                if (machine in test_mod_machines):
                    test_mod = item[1]

        tests.append(
            CIME.utils.get_full_test_name(test_name,
                                          machine=machine,
                                          compiler=compiler,
                                          testmod=test_mod))

    if (inherits_from is not None):
        inherited_tests = get_test_suite(inherits_from, machine, compiler)

        expect(
            len(set(tests) & set(inherited_tests)) == 0,
            "Tests %s defined in multiple suites" %
            ", ".join(set(tests) & set(inherited_tests)))
        tests.extend(inherited_tests)

    return tests
Ejemplo n.º 2
0
def _main_func(options, work_dir):
    ###############################################################################
    """Construct machines html from an XML file."""

    # Initialize a variables for the html template
    mach_dict = dict()
    model_version = options.version[0]

    # get the machine config file
    files = Files()
    config_file = files.get_value("MACHINES_SPEC_FILE")
    expect(os.path.isfile(config_file),
           "Cannot find config_file {} on disk".format(config_file))

    # instantiate a machines object and read XML values into a dictionary
    machines = Machines(config_file, machine="Query")
    mach_list = machines.list_available_machines()

    # get all the machine values loaded into the mach_dict
    mach_dict = machines.return_values()

    # intialize the support keys
    for machine in mach_list:
        mach_dict[(machine, 'support')] = "Unsupported"

    # loop through the list of supported machines and flag in the dictionary
    supported = options.supported[0].split(',')
    for machine in supported:
        mach_dict[(machine, 'support')] = "Scientific"

    # loop through the list of tested machines and flag in the dictionary
    tested = options.tested[0].split(',')
    for machine in tested:
        mach_dict[(machine, 'support')] = "Tested"

    # load up jinja template
    templateLoader = jinja2.FileSystemLoader(
        searchpath='{0}/templates'.format(work_dir))
    templateEnv = jinja2.Environment(loader=templateLoader)

    # TODO - get the cesm_version for the CIME root
    tmplFile = 'machdef2html.tmpl'
    template = templateEnv.get_template(tmplFile)
    templateVars = {
        'mach_list': mach_list,
        'mach_dict': mach_dict,
        'today': _now,
        'model_version': model_version
    }

    # render the template
    mach_tmpl = template.render(templateVars)

    # write the output file
    with open(options.htmlfile[0], 'w') as html:
        html.write(mach_tmpl)

    return 0
Ejemplo n.º 3
0
def get_test_suite(suite,
                   machine=None,
                   compiler=None,
                   skip_inherit=False,
                   skip_tests=None):
    ###############################################################################
    """
    Return a list of FULL test names for a suite.
    """
    expect(suite in get_test_suites(),
           "Unknown test suite: '{}'".format(suite))
    machobj = Machines(machine=machine)
    machine = machobj.get_machine_name()

    if compiler is None:
        compiler = machobj.get_default_compiler()
    expect(
        machobj.is_valid_compiler(compiler),
        "Compiler {} not valid for machine {}".format(compiler, machine),
    )

    inherits_from, _, _, tests_raw = get_test_data(suite)
    tests = []
    for item in tests_raw:
        expect(
            isinstance(item, str),
            "Bad type of test {}, expected string".format(item),
        )

        test_mods = None
        test_components = item.split(".")
        expect(len(test_components) in [3, 4], "Bad test name {}".format(item))

        if len(test_components) == 4:
            test_name = ".".join(test_components[:-1])
            test_mods = test_components[-1]
        else:
            test_name = item
        if not skip_tests or not test_name in skip_tests:
            tests.append(
                CIME.utils.get_full_test_name(
                    test_name,
                    machine=machine,
                    compiler=compiler,
                    testmods_string=test_mods,
                ))

    if not skip_inherit:
        for inherits in inherits_from:
            inherited_tests = get_test_suite(inherits, machine, compiler)

            for inherited_test in inherited_tests:
                if inherited_test not in tests:
                    tests.append(inherited_test)

    return tests
Ejemplo n.º 4
0
    def _case_two_setup(self):
        mach_name = self._case.get_value("MACH")
        mach_obj = Machines(machine=mach_name)
        if mach_obj.is_valid_MPIlib("mpi-serial"):
            self._case.set_value("MPILIB", "mpi-serial")
        else:
            logger.warning(
                "mpi-serial is not supported on machine '{}', "
                "so we have to fall back to default MPI and "
                "therefore very little is being tested".format(mach_name))

        if os.path.isfile("Macros"):
            os.remove("Macros")
Ejemplo n.º 5
0
Archivo: pea.py Proyecto: bertinia/cime
    def _case_two_setup(self):
        mach_name = self._case.get_value("MACH")
        mach_obj = Machines(machine=mach_name)
        if mach_obj.is_valid_MPIlib("mpi-serial"):
            self._case.set_value("MPILIB","mpi-serial")
        else:
            logger.warning("mpi-serial is not supported on machine '{}', "
                           "so we have to fall back to default MPI and "
                           "therefore very little is being tested".format(mach_name))

        if os.path.isfile("Macros"):
            os.remove("Macros")
        self._case.case_setup(test_mode=True, reset=True)
Ejemplo n.º 6
0
Archivo: case.py Proyecto: erika72/cime
    def _create_caseroot_tools(self):
        machines_dir = os.path.abspath(self.get_value("MACHDIR"))
        toolsdir = os.path.join(self.get_value("CIMEROOT"),"scripts","Tools")
        # setup executable files in caseroot/
        exefiles = (os.path.join(toolsdir, "case.setup"),
                    os.path.join(toolsdir, "case.build"),
                    os.path.join(toolsdir, "case.submit"),
                    os.path.join(toolsdir, "preview_namelists"),
                    os.path.join(toolsdir, "check_input_data"),
                    os.path.join(toolsdir, "check_case"),
                    os.path.join(toolsdir, "archive_metadata.sh"),
                    os.path.join(toolsdir, "xmlchange"),
                    os.path.join(toolsdir, "xmlquery"))
        try:
            for exefile in exefiles:
                destfile = os.path.join(self._caseroot,os.path.basename(exefile))
                os.symlink(exefile, destfile)
        except Exception as e:
            logger.warning("FAILED to set up exefiles: %s" % str(e))

        # set up utility files in caseroot/Tools/
        toolfiles = (os.path.join(toolsdir, "check_lockedfiles"),
                     os.path.join(toolsdir, "lt_archive.sh"),
                     os.path.join(toolsdir, "getTiming"),
                     os.path.join(toolsdir, "save_provenance"),
                     os.path.join(machines_dir,"Makefile"),
                     os.path.join(machines_dir,"mkSrcfiles"),
                     os.path.join(machines_dir,"mkDepends"))

        for toolfile in toolfiles:
            destfile = os.path.join(self._caseroot,"Tools",os.path.basename(toolfile))
            expect(os.path.isfile(toolfile)," File %s does not exist"%toolfile)
            try:
                os.symlink(toolfile, destfile)
            except Exception as e:
                logger.warning("FAILED to set up toolfiles: %s %s %s" % (str(e), toolfile, destfile))

        # Create Macros file.
        machine = self.get_value("MACH")
        files = Files()
        # Use config_build if the environment variable is set, or if there is no
        # config_compilers file.
        if os.getenv("CIME_USE_CONFIG_BUILD") == "TRUE" or \
           files.get_value("COMPILERS_SPEC_FILE") is None:
            build_file = files.get_value("BUILD_SPEC_FILE")
            machobj = Machines(machine=machine, files=files)
            macro_maker = Build(machobj)
            macros_path = os.path.join(self._caseroot, "Macros")
            with open(macros_path, "w") as macros_file:
                macro_maker.write_macros('Makefile', build_file, macros_file)

        # Copy any system or compiler Depends files to the case.
        compiler = self.get_value("COMPILER")
        for dep in (machine, compiler):
            dfile = "Depends.%s"%dep
            if os.path.isfile(os.path.join(machines_dir,dfile)):
                shutil.copyfile(os.path.join(machines_dir,dfile), os.path.join(self._caseroot,dfile))
        dfile = "Depends.%s.%s"%(machine,compiler)
        if os.path.isfile(os.path.join(machines_dir,dfile)):
            shutil.copyfile(os.path.join(machines_dir,dfile), os.path.join(self._caseroot, dfile))
Ejemplo n.º 7
0
 def list_machines(self):
     self.MachineList.Machobj = Machines()
     mach_list = self.MachineList.Machobj.list_available_machines()
     self.MachineList.addItems(mach_list)
     name = self.MachineList.Machobj.get_machine_name()
     if name is not None:
         self.MachineList.setCurrentIndex(mach_list.index(name))
         self.MachineSelect(name)
Ejemplo n.º 8
0
def get_test_suite(suite, machine=None, compiler=None):
###############################################################################
    """
    Return a list of FULL test names for a suite.
    """
    expect(suite in _ALL_TESTS, "Unknown test suite: '{}'".format(suite))
    machobj = Machines(machine=machine)
    machine = machobj.get_machine_name()

    if(compiler is None):
        compiler = machobj.get_default_compiler()
    expect(machobj.is_valid_compiler(compiler),"Compiler {} not valid for machine {}".format(compiler,machine))

    inherits_from, _, tests_raw = _ALL_TESTS[suite]
    tests = []
    for item in tests_raw:
        test_mod = None
        if (isinstance(item, six.string_types)):
            test_name = item
        else:
            expect(isinstance(item, tuple), "Bad item type for item '{}'".format(str(item)))
            expect(len(item) in [2, 3], "Expected two or three items in item '{}'".format(str(item)))
            expect(isinstance(item[0], six.string_types), "Expected string in first field of item '{}'".format(str(item)))
            expect(isinstance(item[1], six.string_types), "Expected string in second field of item '{}'".format(str(item)))

            test_name = item[0]
            if (len(item) == 2):
                test_mod = item[1]
            else:
                expect(type(item[2]) in [six.string_types, tuple], "Expected string or tuple for third field of item '{}'".format(str(item)))
                test_mod_machines = [item[2]] if isinstance(item[2], six.string_types) else item[2]
                if (machine in test_mod_machines):
                    test_mod = item[1]

        tests.append(CIME.utils.get_full_test_name(test_name, machine=machine, compiler=compiler, testmod=test_mod))

    if (inherits_from is not None):
        inherits_from = [inherits_from] if isinstance(inherits_from, six.string_types) else inherits_from
        for inherits in inherits_from:
            inherited_tests = get_test_suite(inherits, machine, compiler)

            expect(len(set(tests) & set(inherited_tests)) == 0,
                   "Tests {} defined in multiple suites".format(", ".join(set(tests) & set(inherited_tests))))
            tests.extend(inherited_tests)

    return tests
Ejemplo n.º 9
0
    def __init__(self, machine, compiler, cimeroot, caseroot, mpilib, debug=False):
        self._machine  = Machines(machine=machine)
        self._compiler = compiler
        self._cimeroot = cimeroot
        self._caseroot = caseroot
        self._mpilib   = mpilib
        self._debug    = debug

        self._module_system = self._machine.get_module_system_type()
Ejemplo n.º 10
0
def run_model(case):
###############################################################################

    # Set OMP_NUM_THREADS
    tm = TaskMaker(case)
    num_threads = tm.thread_count
    os.environ["OMP_NUM_THREADS"] = str(num_threads)

    # Run the model
    logger.info("%s MODEL EXECUTION BEGINS HERE" %(time.strftime("%Y-%m-%d %H:%M:%S")))

    machine = Machines(machine=case.get_value("MACH"))
    cmd = machine.get_full_mpirun(tm, case, "case.run")
    cmd = case.get_resolved_value(cmd)

    logger.info("run command is %s " %cmd)
    rundir = case.get_value("RUNDIR")
    run_cmd_no_fail(cmd, from_dir=rundir)
    logger.info( "%s MODEL EXECUTION HAS FINISHED" %(time.strftime("%Y-%m-%d %H:%M:%S")))
Ejemplo n.º 11
0
def find_all_supported_platforms():
###############################################################################
    """
    Returns a set of all ACME supported platforms as defined in the
    XML configuration file config_machines.xml in the ACME source
    tree. A platform is defined by a triple (machine name, compiler,
    mpi library).
    """
    machines = CIME.utils.get_machines()
    machobj = Machines(machine=machine)
    platform_set = set()

    for machine in machines:
        machobj.set_machine(machine)
        compilers, mpilibs = machobj.get_value("COMPILERS"), machobj.get_value("MPILIBS")
        for compiler in compilers:
            for mpilib in mpilibs:
                platform_set.add((machine, compiler, mpilib))

    return list(platform_set)
Ejemplo n.º 12
0
def get_test_suite(suite, machine=None, compiler=None, skip_inherit=False):
###############################################################################
    """
    Return a list of FULL test names for a suite.
    """
    expect(suite in get_test_suites(), "Unknown test suite: '{}'".format(suite))
    machobj = Machines(machine=machine)
    machine = machobj.get_machine_name()

    if(compiler is None):
        compiler = machobj.get_default_compiler()
    expect(machobj.is_valid_compiler(compiler),"Compiler {} not valid for machine {}".format(compiler,machine))

    inherits_from, _, _, tests_raw = get_test_data(suite)
    tests = []
    for item in tests_raw:
        expect(isinstance(item, six.string_types), "Bad type of test {}, expected string".format(item))

        test_mod = None
        test_components = item.split(".")
        expect(len(test_components) in [3, 4], "Bad test name {}".format(item))

        if (len(test_components) == 4):
            test_name = ".".join(test_components[:-1])
            test_mod = test_components[-1]
        else:
            test_name = item

        tests.append(CIME.utils.get_full_test_name(test_name, machine=machine, compiler=compiler, testmod=test_mod))

    if not skip_inherit:
        for inherits in inherits_from:
            inherited_tests = get_test_suite(inherits, machine, compiler)

            for inherited_test in inherited_tests:
                if inherited_test not in tests:
                    tests.append(inherited_test)

    return tests
Ejemplo n.º 13
0
class TestScheduler(object):
###############################################################################

    ###########################################################################
    def __init__(self, test_names, test_data=None,
                 no_run=False, no_build=False, no_setup=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_cmp_name=None, baseline_gen_name=None,
                 clean=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 walltime=None, proc_pool=None,
                 use_existing=False, save_timing=False, queue=None,
                 allow_baseline_overwrite=False, output_root=None,
                 force_procs=None, force_threads=None, mpilib=None,
                 input_dir=None, pesfile=None, mail_user=None, mail_type=None):
    ###########################################################################
        self._cime_root       = CIME.utils.get_cime_root()
        self._cime_model      = get_model()
        self._save_timing     = save_timing
        self._queue           = queue
        self._test_data       = {} if test_data is None else test_data # Format:  {test_name -> {data_name -> data}}
        self._mpilib          = mpilib  # allow override of default mpilib
        self._completed_tests = 0
        self._input_dir       = input_dir
        self._pesfile         = pesfile
        self._allow_baseline_overwrite = allow_baseline_overwrite

        self._mail_user = mail_user
        self._mail_type = mail_type

        self._machobj = Machines(machine=machine_name)

        self._model_build_cost = 4

        # If user is forcing procs or threads, re-write test names to reflect this.
        if force_procs or force_threads:
            test_names = _translate_test_names_for_new_pecount(test_names, force_procs, force_threads)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run   = no_run or self._no_build
        self._output_root = output_root
        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        if test_root is not None:
            self._test_root = test_root
        elif self._output_root is not None:
            self._test_root = self._output_root
        else:
            self._test_root = self._machobj.get_value("CIME_OUTPUT_ROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_timestamp()

        self._compiler = self._machobj.get_default_compiler() if compiler is None else compiler

        self._clean          = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      self._machobj.get_value("MAX_MPITASKS_PER_NODE"))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name # Implies generation should be done if not None

        # Compute baseline_root
        self._baseline_root = baseline_root if baseline_root is not None \
                              else self._machobj.get_value("BASELINE_ROOT")

        if self._project is not None:
            self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

        self._baseline_root = os.path.abspath(self._baseline_root)

        if baseline_cmp_name or baseline_gen_name:
            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory {}".format(full_baseline_dir))

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)

                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                       "Baseline directories already exists {}\n" \
                       "Use -o to avoid this error".format(existing_baselines))

        if self._cime_model == "e3sm":
            _order_tests_by_runtime(test_names, self._baseline_root)

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # name -> (phase, status)
        self._tests = OrderedDict()
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("MAX_TASKS_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)

        if use_existing:
            for test in self._tests:
                with TestStatus(self._get_test_dir(test)) as ts:
                    for phase, status in ts:
                        if phase in CORE_PHASES:
                            if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                                if status == TEST_FAIL_STATUS:
                                    # Import for potential subsequent waits
                                    ts.set_status(phase, TEST_PEND_STATUS)

                                # We need to pick up here
                                break

                            else:
                                if phase != SUBMIT_PHASE:
                                    # Somewhat subtle. Create_test considers submit/run to be the run phase,
                                    # so don't try to update test status for a passed submit phase
                                    self._update_test_status(test, phase, TEST_PEND_STATUS)
                                    self._update_test_status(test, phase, status)

                                    if phase == RUN_PHASE:
                                        logger.info("Test {} passed and will not be re-run".format(test))

                logger.info("Using existing test directory {}".format(self._get_test_dir(test)))
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '{}', it already exists."
                       " Pick a different test-id".format(self._get_test_dir(test)))
                logger.info("Creating test directory {}".format(self._get_test_dir(test)))

        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def get_testnames(self):
    ###########################################################################
        return list(self._tests.keys())

    ###########################################################################
    def _log_output(self, test, output):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if not os.path.isdir(test_dir):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)
        append_testlog(output, caseroot=test_dir)

    ###########################################################################
    def _get_case_id(self, test):
    ###########################################################################
        baseline_action_code = ""
        if self._baseline_gen_name:
            baseline_action_code += "G"
        if self._baseline_cmp_name:
            baseline_action_code += "C"
        if len(baseline_action_code) > 0:
            return "{}.{}.{}".format(test, baseline_action_code, self._test_id)
        else:
            return "{}.{}".format(test, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
    ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
    ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
    ###########################################################################
        status = self._get_test_status(test)
        return status != TEST_PASS_STATUS and status != TEST_PEND_STATUS

    ###########################################################################
    def _work_remains(self, test):
    ###########################################################################
        test_phase, test_status = self._get_test_data(test)
        return (test_status == TEST_PASS_STATUS or test_status == TEST_PEND_STATUS) and\
            test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
    ###########################################################################
        curr_phase, curr_status = self._get_test_data(test)
        if phase is None or phase == curr_phase:
            return curr_status
        else:
            expect(phase is None or self._phases.index(phase) < self._phases.index(curr_phase),
                   "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
    ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
    ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status = self._get_test_data(test)

        if old_phase == phase:
            expect(old_status == TEST_PEND_STATUS,
                   "Only valid to transition from PEND to something else, found '{}' for phase '{}'".format(old_status, phase))
            expect(status != TEST_PEND_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(old_status == TEST_PASS_STATUS,
                   "Why did we move on to next phase when prior phase did not pass?")
            expect(status == TEST_PEND_STATUS,
                   "New phase should be set to pending status")
            expect(self._phases.index(old_phase) == phase_idx - 1,
                   "Skipped phase? {} {}".format(old_phase, phase_idx))

        # Must be atomic
        self._tests[test] = (phase, status)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
    ###########################################################################
        while True:
            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
            if rc != 0:
                self._log_output(test,
                                 "{} FAILED for test '{}'.\nCommand: {}\nOutput: {}\n".
                                 format(phase, test, cmd,
                                        output.encode('utf-8') + b"\n" + errput.encode('utf-8')))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if "bad interpreter" in output:
                    time.sleep(1)
                    continue
                else:
                    return False, errput
            else:
                # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                # succeeded was the submission.
                phase = "SUBMIT" if phase == RUN_PHASE else phase
                self._log_output(test,
                                 "{} PASSED for test '{}'.\nCommand: {}\nOutput: {}\n".
                                 format(phase, test, cmd,
                                        output.encode('utf-8') + b"\n" + errput.encode('utf-8')))
                return True, errput

    ###########################################################################
    def _create_newcase_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        _, case_opts, grid, compset,\
            machine, compiler, test_mods = CIME.utils.parse_test_name(test)

        create_newcase_cmd = "{} --case {} --res {} --compset {}"\
                             " --test".format(os.path.join(self._cime_root, "scripts", "create_newcase"),
                                              test_dir, grid, compset)
        if machine is not None:
            create_newcase_cmd += " --machine {}".format(machine)
        if compiler is not None:
            create_newcase_cmd += " --compiler {}".format(compiler)
        if self._project is not None:
            create_newcase_cmd += " --project {} ".format(self._project)
        if self._output_root is not None:
            create_newcase_cmd += " --output-root {} ".format(self._output_root)
        if self._input_dir is not None:
            create_newcase_cmd += " --input-dir {} ".format(self._input_dir)

        if self._pesfile is not None:
            create_newcase_cmd += " --pesfile {} ".format(self._pesfile)

        if test_mods is not None:
            files = Files()
            (component, modspath) = test_mods.split('/',1)

            testmods_dir = files.get_value("TESTS_MODS_DIR", {"component": component})
            test_mod_file = os.path.join(testmods_dir, component, modspath)
            if not os.path.exists(test_mod_file):
                error = "Missing testmod file '{}'".format(test_mod_file)
                self._log_output(test, error)
                return False, error

            create_newcase_cmd += " --user-mods-dir {}".format(test_mod_file)

        mpilib = None
        ninst = 1
        ncpl = 1
        if case_opts is not None:
            for case_opt in case_opts: # pylint: disable=not-an-iterable
                if case_opt.startswith('M'):
                    mpilib = case_opt[1:]
                    create_newcase_cmd += " --mpilib {}".format(mpilib)
                    logger.debug (" MPILIB set to {}".format(mpilib))
                elif case_opt.startswith('N'):
                    expect(ncpl == 1,"Cannot combine _C and _N options")
                    ninst = case_opt[1:]
                    create_newcase_cmd += " --ninst {}".format(ninst)
                    logger.debug (" NINST set to {}".format(ninst))
                elif case_opt.startswith('C'):
                    expect(ninst == 1,"Cannot combine _C and _N options")
                    ncpl = case_opt[1:]
                    create_newcase_cmd += " --ninst {} --multi-driver" .format(ncpl)
                    logger.debug (" NCPL set to {}" .format(ncpl))
                elif case_opt.startswith('P'):
                    pesize = case_opt[1:]
                    create_newcase_cmd += " --pecount {}".format(pesize)
                elif case_opt.startswith('V'):
                    driver = case_opt[1:]
                    create_newcase_cmd += " --driver {}".format(driver)

        # create_test mpilib option overrides default but not explicitly set case_opt mpilib
        if mpilib is None and self._mpilib is not None:
            create_newcase_cmd += " --mpilib {}".format(self._mpilib)
            logger.debug (" MPILIB set to {}".format(self._mpilib))

        if self._queue is not None:
            create_newcase_cmd += " --queue={}".format(self._queue)

        if self._walltime is not None:
            create_newcase_cmd += " --walltime {}".format(self._walltime)
        else:
            # model specific ways of setting time
            if self._cime_model == "e3sm":
                recommended_time = _get_time_est(test, self._baseline_root)

                if recommended_time is not None:
                    create_newcase_cmd += " --walltime {}".format(recommended_time)

            else:
                if test in self._test_data and "options" in self._test_data[test] and \
                        "wallclock" in self._test_data[test]['options']:
                    create_newcase_cmd += " --walltime {}".format(self._test_data[test]['options']['wallclock'])

        logger.debug("Calling create_newcase: " + create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd, CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
    ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]

        # Create, fill and write an envtest object
        test_dir = self._get_test_dir(test)
        envtest = EnvTest(test_dir)

        # Determine list of component classes that this coupler/driver knows how
        # to deal with. This list follows the same order as compset longnames follow.
        files = Files()
        drv_config_file = files.get_value("CONFIG_CPL_FILE")
        drv_comp = Component(drv_config_file, "CPL")
        envtest.add_elements_by_group(files, {}, "env_test.xml")
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)
        if test in self._test_data and "options" in self._test_data[test] and \
                "memleak_tolerance" in self._test_data[test]['options']:
            envtest.set_value("TEST_MEMLEAK_TOLERANCE", self._test_data[test]['options']['memleak_tolerance'])

        test_argv = "-testname {} -testroot {}".format(test, self._test_root)
        if self._baseline_gen_name:
            test_argv += " -generate {}".format(self._baseline_gen_name)
            basegen_case_fullpath = os.path.join(self._baseline_root,self._baseline_gen_name, test)
            logger.debug("basegen_case is {}".format(basegen_case_fullpath))
            envtest.set_value("BASELINE_NAME_GEN", self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE", os.path.join(self._baseline_gen_name, test))
        if self._baseline_cmp_name:
            test_argv += " -compare {}".format(self._baseline_cmp_name)
            envtest.set_value("BASELINE_NAME_CMP", self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE", os.path.join(self._baseline_cmp_name, test))

        envtest.set_value("TEST_ARGV", test_argv)
        envtest.set_value("CLEANUP", self._clean)

        envtest.set_value("BASELINE_ROOT", self._baseline_root)
        envtest.set_value("GENERATE_BASELINE", self._baseline_gen_name is not None)
        envtest.set_value("COMPARE_BASELINE", self._baseline_cmp_name is not None)
        envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC", resolved=False))
        tput_tolerance = self._machobj.get_value("TEST_TPUT_TOLERANCE", resolved=False)
        envtest.set_value("TEST_TPUT_TOLERANCE", 0.25 if tput_tolerance is None else tput_tolerance)

        # Add the test instructions from config_test to env_test in the case
        config_test = Tests()
        testnode = config_test.get_test_node(test_case)
        envtest.add_test(testnode)
        # Determine the test_case from the test name
        test_case, case_opts = CIME.utils.parse_test_name(test)[:2]

        # Determine case_opts from the test_case
        if case_opts is not None:
            logger.debug("case_opts are {} ".format(case_opts))
            for opt in case_opts: # pylint: disable=not-an-iterable

                logger.debug("case_opt is {}".format(opt))
                if opt == 'D':
                    envtest.set_test_parameter("DEBUG", "TRUE")
                    logger.debug (" DEBUG set to TRUE")

                elif opt == 'E':
                    envtest.set_test_parameter("USE_ESMF_LIB", "TRUE")
                    logger.debug (" USE_ESMF_LIB set to TRUE")

                elif opt == 'CG':
                    envtest.set_test_parameter("CALENDAR", "GREGORIAN")
                    logger.debug (" CALENDAR set to {}".format(opt))

                elif opt.startswith('L'):
                    match =  re.match('L([A-Za-z])([0-9]*)', opt)
                    stop_option = {"y":"nyears", "m":"nmonths", "d":"ndays", "h":"nhours",
                                   "s":"nseconds", "n":"nsteps"}
                    opt = match.group(1)
                    envtest.set_test_parameter("STOP_OPTION",stop_option[opt])
                    opti = match.group(2)
                    envtest.set_test_parameter("STOP_N", opti)

                    logger.debug (" STOP_OPTION set to {}".format(stop_option[opt]))
                    logger.debug (" STOP_N      set to {}".format(opti))

                elif opt.startswith('R'):
                    # R option is for testing in PTS_MODE or Single Column Model
                    #  (SCM) mode
                    envtest.set_test_parameter("PTS_MODE", "TRUE")

                    # For PTS_MODE, compile with mpi-serial
                    envtest.set_test_parameter("MPILIB", "mpi-serial")

                elif (opt.startswith('I') or # Marker to distinguish tests with same name - ignored
                      opt.startswith('M') or # handled in create_newcase
                      opt.startswith('P') or # handled in create_newcase
                      opt.startswith('N') or # handled in create_newcase
                      opt.startswith('C') or # handled in create_newcase
                      opt.startswith('V')):  # handled in create_newcase
                    pass

                elif opt.startswith('IOP'):
                    logger.warning("IOP test option not yet implemented")
                else:
                    expect(False, "Could not parse option '{}' ".format(opt))

        envtest.write()
        lock_file("env_run.xml", caseroot=test_dir, newname="env_run.orig.xml")

        with Case(test_dir, read_only=False) as case:
            if self._output_root is None:
                self._output_root = case.get_value("CIME_OUTPUT_ROOT")
            # if we are running a single test we don't need sharedlibroot
            if len(self._tests) > 1 and self._cime_model != "e3sm":
                case.set_value("SHAREDLIBROOT",
                               os.path.join(self._output_root,
                                            "sharedlibroot.{}".format(self._test_id)))
            envtest.set_initial_values(case)
            case.set_value("TEST", True)
            case.set_value("SAVE_TIMING", self._save_timing)

            # Scale back build parallelism on systems with few cores
            if self._model_build_cost > self._proc_pool:
                case.set_value("GMAKE_J", self._proc_pool)
                self._model_build_cost = self._proc_pool

        return True, ""

    ###########################################################################
    def _setup_phase(self, test):
    ###########################################################################
        test_dir  = self._get_test_dir(test)
        rv = self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)

        # It's OK for this command to fail with baseline diffs but not catastrophically
        if rv[0]:
            cmdstat, output, _ = run_cmd("./case.cmpgen_namelists", combine_output=True, from_dir=test_dir)
            expect(cmdstat in [0, TESTS_FAILED_ERR_CODE], "Fatal error in case.cmpgen_namelists: {}".format(output))

        return rv

    ###########################################################################
    def _sharedlib_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --sharedlib-only", SHAREDLIB_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _model_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --model-only", MODEL_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        cmd = "./case.submit --skip-preview-namelist"
        if self._no_batch:
            cmd += " --no-batch"
        if self._mail_user:
            cmd += " --mail-user={}".format(self._mail_user)
        if self._mail_type:
            cmd += " -M={}".format(",".join(self._mail_type))

        return self._shell_cmd_for_phase(test, cmd, RUN_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_catch_exceptions(self, test, phase, run):
    ###########################################################################
        try:
            return run(test)
        except (SystemExit, Exception) as e:
            exc_tb = sys.exc_info()[2]
            errput = "Test '{}' failed in phase '{}' with exception '{}'\n".format(test, phase, str(e))
            errput += ''.join(traceback.format_tb(exc_tb))
            self._log_output(test, errput)
            return False, errput

    ###########################################################################
    def _get_procs_needed(self, test, phase, threads_in_flight=None, no_batch=False):
    ###########################################################################
        if phase == RUN_PHASE and (self._no_batch or no_batch):
            test_dir = self._get_test_dir(test)
            total_pes = int(run_cmd_no_fail("./xmlquery TOTALPES --value", from_dir=test_dir))
            threads = eval(run_cmd_no_fail("./xmlquery NTHRDS --value", from_dir=test_dir))
            max_threads = 0
            for item in threads:
                _, comp_threads = item.split(":")
                comp_threads = int(comp_threads)
                if comp_threads > max_threads:
                    max_threads = comp_threads

            max_cores = total_pes * max_threads
            return max_cores

        elif (phase == SHAREDLIB_BUILD_PHASE):
            if self._cime_model == "cesm":
                # Will force serialization of sharedlib builds
                # TODO - instead of serializing, compute all library configs needed and build
                # them all in parallel
                for _, _, running_phase in threads_in_flight.values():
                    if (running_phase == SHAREDLIB_BUILD_PHASE):
                        return self._proc_pool + 1

            return 1
        elif (phase == MODEL_BUILD_PHASE):
            # Model builds now happen in parallel
            return self._model_build_cost
        else:
            return 1

    ###########################################################################
    def _wait_for_something_to_finish(self, threads_in_flight):
    ###########################################################################
        expect(len(threads_in_flight) <= self._parallel_jobs, "Oversubscribed?")
        finished_tests = []
        while not finished_tests:
            for test, thread_info in threads_in_flight.items():
                if not thread_info[0].is_alive():
                    finished_tests.append((test, thread_info[1]))

            if not finished_tests:
                time.sleep(0.2)

        for finished_test, procs_needed in finished_tests:
            self._procs_avail += procs_needed
            del threads_in_flight[finished_test]

    ###########################################################################
    def _update_test_status_file(self, test, test_phase, status):
    ###########################################################################
        """
        In general, test_scheduler should not be responsible for updating
        the TestStatus file, but there are a few cases where it has to.
        """
        test_dir = self._get_test_dir(test)
        with TestStatus(test_dir=test_dir, test_name=test) as ts:
            ts.set_status(test_phase, status)

    ###########################################################################
    def _consumer(self, test, test_phase, phase_method):
    ###########################################################################
        before_time = time.time()
        success, errors = self._run_catch_exceptions(test, test_phase, phase_method)
        elapsed_time = time.time() - before_time
        status  = (TEST_PEND_STATUS if test_phase == RUN_PHASE and not \
                   self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS

        if status != TEST_PEND_STATUS:
            self._update_test_status(test, test_phase, status)

        if not self._work_remains(test):
            self._completed_tests += 1
            total = len(self._tests)
            status_str = "Finished {} for test {} in {:f} seconds ({}). [COMPLETED {:d} of {:d}]".format(test_phase, test, elapsed_time, status, self._completed_tests, total)
        else:
            status_str = "Finished {} for test {} in {:f} seconds ({})".format(test_phase, test, elapsed_time, status)

        if not success:
            status_str += "\n    Case dir: {}\n".format(self._get_test_dir(test))
            status_str += "    Errors were:\n        {}\n".format("\n        ".join(errors.splitlines()))

        logger.info(status_str)

        if test_phase in [CREATE_NEWCASE_PHASE, XML_PHASE]:
            # These are the phases for which TestScheduler is reponsible for
            # updating the TestStatus file
            self._update_test_status_file(test, test_phase, status)

        if test_phase == XML_PHASE:
            append_status("Case Created using: "+" ".join(sys.argv), "README.case", caseroot=self._get_test_dir(test))

        # On batch systems, we want to immediately submit to the queue, because
        # it's very cheap to submit and will get us a better spot in line
        if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE):
            logger.info("Starting {} for test {} with 1 proc on interactive node and {:d} procs on compute nodes".format(RUN_PHASE, test, self._get_procs_needed(test, RUN_PHASE, no_batch=True)))
            self._update_test_status(test, RUN_PHASE, TEST_PEND_STATUS)
            self._consumer(test, RUN_PHASE, self._run_phase)

    ###########################################################################
    def _producer(self):
    ###########################################################################
        threads_in_flight = {} # test-name -> (thread, procs, phase)
        while True:
            work_to_do = False
            num_threads_launched_this_iteration = 0
            for test in self._tests:
                logger.debug("test_name: " + test)

                if self._work_remains(test):
                    work_to_do = True

                    # If we have no workers available, immediately break out of loop so we can wait
                    if len(threads_in_flight) == self._parallel_jobs:
                        break

                    if test not in threads_in_flight:
                        test_phase, test_status = self._get_test_data(test)
                        expect(test_status != TEST_PEND_STATUS, test)
                        next_phase = self._phases[self._phases.index(test_phase) + 1]
                        procs_needed = self._get_procs_needed(test, next_phase, threads_in_flight)

                        if procs_needed <= self._procs_avail:
                            self._procs_avail -= procs_needed

                            # Necessary to print this way when multiple threads printing
                            logger.info("Starting {} for test {} with {:d} procs".format(next_phase, test, procs_needed))

                            self._update_test_status(test, next_phase, TEST_PEND_STATUS)
                            new_thread = threading.Thread(target=self._consumer,
                                args=(test, next_phase, getattr(self, "_{}_phase".format(next_phase.lower())) ))
                            threads_in_flight[test] = (new_thread, procs_needed, next_phase)
                            new_thread.start()
                            num_threads_launched_this_iteration += 1

                            logger.debug("  Current workload:")
                            total_procs = 0
                            for the_test, the_data in six.iteritems(threads_in_flight):
                                logger.debug("    {}: {} -> {}".format(the_test, the_data[2], the_data[1]))
                                total_procs += the_data[1]

                            logger.debug("    Total procs in use: {}".format(total_procs))
                        else:
                            if not threads_in_flight:
                                msg = "Phase '{}' for test '{}' required more processors, {:d}, than this machine can provide, {:d}".format(next_phase, test, procs_needed, self._procs_avail)
                                logger.warning(msg)
                                self._update_test_status(test, next_phase, TEST_PEND_STATUS)
                                self._update_test_status(test, next_phase, TEST_FAIL_STATUS)
                                self._log_output(test, msg)
                                if next_phase == RUN_PHASE:
                                    self._update_test_status_file(test, SUBMIT_PHASE, TEST_PASS_STATUS)
                                    self._update_test_status_file(test, next_phase, TEST_FAIL_STATUS)
                                else:
                                    self._update_test_status_file(test, next_phase, TEST_FAIL_STATUS)
                                num_threads_launched_this_iteration += 1

            if not work_to_do:
                break

            if num_threads_launched_this_iteration == 0:
                # No free resources, wait for something in flight to finish
                self._wait_for_something_to_finish(threads_in_flight)

        for unfinished_thread, _, _ in threads_in_flight.values():
            unfinished_thread.join()

    ###########################################################################
    def _setup_cs_files(self):
    ###########################################################################
        try:
            python_libs_root = CIME.utils.get_python_libs_root()
            template_file = os.path.join(python_libs_root, "cs.status.template")
            template = open(template_file, "r").read()
            template = template.replace("<PATH>",
                                        os.path.join(self._cime_root,"scripts","Tools")).replace\
                                        ("<TESTID>", self._test_id).replace\
                                        ("<TESTROOT>", self._test_root)
            if not os.path.exists(self._test_root):
                os.makedirs(self._test_root)
            cs_status_file = os.path.join(self._test_root, "cs.status.{}".format(self._test_id))
            with open(cs_status_file, "w") as fd:
                fd.write(template)
            os.chmod(cs_status_file, os.stat(cs_status_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            template_file = os.path.join(python_libs_root, "cs.submit.template")
            template = open(template_file, "r").read()
            setup_cmd = "./case.setup" if self._no_setup else ":"
            build_cmd = "./case.build" if self._no_build else ":"
            test_cmd  = "./case.submit"
            template = template.replace("<SETUP_CMD>", setup_cmd).\
                       replace("<BUILD_CMD>", build_cmd).\
                       replace("<RUN_CMD>", test_cmd).\
                       replace("<TESTID>", self._test_id)

            if self._no_run:
                cs_submit_file = os.path.join(self._test_root, "cs.submit.{}".format(self._test_id))
                with open(cs_submit_file, "w") as fd:
                    fd.write(template)
                os.chmod(cs_submit_file,
                         os.stat(cs_submit_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            if self._cime_model == "cesm":
                template_file = os.path.join(python_libs_root, "testreporter.template")
                template = open(template_file, "r").read()
                template = template.replace("<PATH>",
                                            os.path.join(self._cime_root, "scripts", "Tools"))
                testreporter_file = os.path.join(self._test_root, "testreporter")
                with open(testreporter_file, "w") as fd:
                    fd.write(template)
                os.chmod(testreporter_file, os.stat(testreporter_file).st_mode
                         | stat.S_IXUSR | stat.S_IXGRP)

        except Exception as e:
            logger.warning("FAILED to set up cs files: {}".format(str(e)))

    ###########################################################################
    def run_tests(self, wait=False,
                  wait_check_throughput=False,
                  wait_check_memory=False,
                  wait_ignore_namelists=False,
                  wait_ignore_memleak=False):
    ###########################################################################
        """
        Main API for this class.

        Return True if all tests passed.
        """
        start_time = time.time()

        # Tell user what will be run
        logger.info( "RUNNING TESTS:")
        for test in self._tests:
            logger.info( "  {}".format(test))

        # Setup cs files
        self._setup_cs_files()

        GenericXML.DISABLE_CACHING = True
        self._producer()
        GenericXML.DISABLE_CACHING = False

        expect(threading.active_count() == 1, "Leftover threads?")

        wait_handles_report = False
        if not self._no_run and not self._no_batch:
            if wait:
                logger.info("Waiting for tests to finish")
                rv = wait_for_tests(glob.glob(os.path.join(self._test_root, "*{}/TestStatus".format(self._test_id))),
                                    check_throughput=wait_check_throughput,
                                    check_memory=wait_check_memory,
                                    ignore_namelists=wait_ignore_namelists,
                                    ignore_memleak=wait_ignore_memleak)
                wait_handles_report = True
            else:
                logger.info("Due to presence of batch system, create_test will exit before tests are complete.\n" \
                            "To force create_test to wait for full completion, use --wait")

        # Return True if all tests passed from our point of view
        if not wait_handles_report:
            logger.info( "At test-scheduler close, state is:")
            rv = True
            for test in self._tests:
                phase, status = self._get_test_data(test)

                # Give highest priority to fails in test schduler
                if status not in [TEST_PASS_STATUS, TEST_PEND_STATUS]:
                    logger.info( "{} {} (phase {})".format(status, test, phase))
                    rv = False

                else:
                    # Be cautious about telling the user that the test passed. This
                    # status should match what they would see on the dashboard. Our
                    # self._test_states does not include comparison fail information,
                    # so we need to parse test status.
                    ts = TestStatus(self._get_test_dir(test))
                    nlfail = ts.get_status(NAMELIST_PHASE) == TEST_FAIL_STATUS
                    ts_status = ts.get_overall_test_status(ignore_namelists=True, check_memory=False, check_throughput=False)

                    if ts_status not in [TEST_PASS_STATUS, TEST_PEND_STATUS]:
                        logger.info( "{} {} (phase {})".format(ts_status, test, phase))
                        rv = False
                    elif nlfail:
                        logger.info( "{} {} (but otherwise OK) {}".format(NAMELIST_FAIL_STATUS, test, phase))
                        rv = False
                    else:
                        logger.info("{} {} {}".format(status, test, phase))

                logger.info( "    Case dir: {}".format(self._get_test_dir(test)))

        logger.info( "test-scheduler took {} seconds".format(time.time() - start_time))

        return rv
Ejemplo n.º 14
0
def _main():
    output, build_dir, build_optimized, clean,\
        cmake_args, compiler, enable_genf90, machine, machines_dir,\
        make_j, use_mpi, mpilib, mpirun_command, test_spec_dir, ctest_args,\
        use_openmp, xml_test_list, verbose \
        = parse_command_line(sys.argv)

    #=================================================
    # Find directory and file paths.
    #=================================================
    suite_specs = []
    # TODO: this violates cime policy of direct access to xml
    # should be moved to CIME/XML
    if xml_test_list is not None:
        test_xml_tree = ElementTree()
        test_xml_tree.parse(xml_test_list)
        known_paths = {
            "here": os.path.abspath(os.path.dirname(xml_test_list)),
        }
        suite_specs.extend(suites_from_xml(test_xml_tree, known_paths))
    if test_spec_dir is not None:
        suite_specs.append(
            TestSuiteSpec("__command_line_test__", ["__command_line_test__"],
                          [os.path.abspath(test_spec_dir)]))

    if machines_dir is not None:
        machines_file = os.path.join(machines_dir, "config_machines.xml")
        machobj = Machines(infile=machines_file, machine=machine)
    else:
        machobj = Machines(machine=machine)

    # Create build directory if necessary.
    build_dir = os.path.abspath(build_dir)

    if not os.path.isdir(build_dir):
        os.mkdir(build_dir)

    # Switch to the build directory.
    os.chdir(build_dir)
    if clean:
        pwd_contents = os.listdir(os.getcwd())
        # Clear CMake cache.
        for file_ in pwd_contents:
            if file_ in ("Macros.cmake", "env_mach_specific.xml") \
                    or file_.startswith('Depends') or file_.startswith(".env_mach_specific"):
                os.remove(file_)

    #=================================================
    # Functions to perform various stages of build.
    #=================================================

    if not use_mpi:
        mpilib = "mpi-serial"
    elif mpilib is None:
        mpilib = machobj.get_default_MPIlib()
        logger.info("Using mpilib: {}".format(mpilib))

    if compiler is None:
        compiler = machobj.get_default_compiler()
        logger.info("Compiler is {}".format(compiler))

    compilerobj = Compilers(machobj, compiler=compiler, mpilib=mpilib)

    pfunit_path = find_pfunit(compilerobj,
                              mpilib=mpilib,
                              use_openmp=use_openmp)

    debug = not build_optimized
    os_ = machobj.get_value("OS")

    # Create the environment, and the Macros.cmake file
    #
    #
    configure(machobj,
              build_dir, ["CMake"],
              compiler,
              mpilib,
              debug,
              os_,
              unit_testing=True)
    machspecific = EnvMachSpecific(build_dir, unit_testing=True)

    fake_case = FakeCase(compiler, mpilib, debug)
    machspecific.load_env(fake_case)
    os.environ["OS"] = os_
    os.environ["COMPILER"] = compiler
    os.environ["DEBUG"] = stringify_bool(debug)
    os.environ["MPILIB"] = mpilib
    if use_openmp:
        os.environ["compile_threaded"] = "true"
    else:
        os.environ["compile_threaded"] = "false"

    os.environ["UNIT_TEST_HOST"] = socket.gethostname()
    if "NETCDF_PATH" in os.environ and not "NETCDF" in os.environ:
        # The CMake Netcdf find utility that we use (from pio2) seems to key off
        # of the environment variable NETCDF, but not NETCDF_PATH
        logger.info("Setting NETCDF environment variable: {}".format(
            os.environ["NETCDF_PATH"]))
        os.environ["NETCDF"] = os.environ["NETCDF_PATH"]

    if not use_mpi:
        mpirun_command = ""
    elif mpirun_command is None:
        mpi_attribs = {
            "compiler": compiler,
            "mpilib": mpilib,
            "threaded": use_openmp,
            "unit_testing": True
        }

        # We can get away with specifying case=None since we're using exe_only=True
        mpirun_command, _ = machspecific.get_mpirun(None,
                                                    mpi_attribs,
                                                    None,
                                                    exe_only=True)
        mpirun_command = machspecific.get_resolved_value(mpirun_command)
        logger.info("mpirun command is '{}'".format(mpirun_command))


#=================================================
# Run tests.
#=================================================

    for spec in suite_specs:
        os.chdir(build_dir)
        if os.path.isdir(spec.name):
            if clean:
                rmtree(spec.name)

        if not os.path.isdir(spec.name):
            os.mkdir(spec.name)

        for label, directory in spec:
            os.chdir(os.path.join(build_dir, spec.name))
            if not os.path.isdir(label):
                os.mkdir(label)

            os.chdir(label)

            name = spec.name + "/" + label

            if not os.path.islink("Macros.cmake"):
                os.symlink(os.path.join(build_dir, "Macros.cmake"),
                           "Macros.cmake")
            use_mpiserial = not use_mpi
            cmake_stage(name,
                        directory,
                        build_optimized,
                        use_mpiserial,
                        mpirun_command,
                        output,
                        pfunit_path,
                        verbose=verbose,
                        enable_genf90=enable_genf90,
                        cmake_args=cmake_args)
            make_stage(name, output, make_j, clean=clean, verbose=verbose)

    for spec in suite_specs:
        os.chdir(os.path.join(build_dir, spec.name))
        for label, directory in spec:

            name = spec.name + "/" + label

            output.print_header("Running CTest tests for " + name + ".")

            ctest_command = ["ctest", "--output-on-failure"]

            if verbose:
                ctest_command.append("-VV")

            if ctest_args is not None:
                ctest_command.extend(ctest_args.split(" "))

            run_cmd_no_fail(" ".join(ctest_command),
                            from_dir=label,
                            arg_stdout=None,
                            arg_stderr=subprocess.STDOUT)
Ejemplo n.º 15
0
def _main():
    output, build_dir, build_optimized, clean,\
        cmake_args, compiler, enable_genf90, machine, machines_dir,\
        make_j, use_mpi, mpilib, mpirun_command, test_spec_dir, ctest_args,\
        use_openmp, xml_test_list, verbose \
        = parse_command_line(sys.argv)

#=================================================
# Find directory and file paths.
#=================================================
    suite_specs = []
    # TODO: this violates cime policy of direct access to xml
    # should be moved to CIME/XML
    if xml_test_list is not None:
        test_xml_tree = ElementTree()
        test_xml_tree.parse(xml_test_list)
        known_paths = {
            "here": os.path.abspath(os.path.dirname(xml_test_list)),
            }
        suite_specs.extend(suites_from_xml(test_xml_tree, known_paths))
    if test_spec_dir is not None:
        suite_specs.append(
            TestSuiteSpec("__command_line_test__",
                          ["__command_line_test__"],
                          [os.path.abspath(test_spec_dir)])
            )


    if machines_dir is not None:
        machines_file = os.path.join(machines_dir, "config_machines.xml")
        machobj = Machines(infile=machines_file, machine=machine)
    else:
        machobj = Machines(machine=machine)

    # Create build directory if necessary.
    build_dir = os.path.abspath(build_dir)

    if not os.path.isdir(build_dir):
        os.mkdir(build_dir)

    # Switch to the build directory.
    os.chdir(build_dir)

    #=================================================
    # Functions to perform various stages of build.
    #=================================================

    if not use_mpi:
        mpilib = "mpi-serial"
    elif mpilib is None:
        mpilib = machobj.get_default_MPIlib()
        logger.info("Using mpilib: {}".format(mpilib))

    if compiler is None:
        compiler = machobj.get_default_compiler()
        logger.info("Compiler is {}".format(compiler))

    compilerobj = Compilers(machobj, compiler=compiler, mpilib=mpilib)

    pfunit_path = find_pfunit(compilerobj, mpilib=mpilib, use_openmp=use_openmp)

    debug = not build_optimized
    os_ = machobj.get_value("OS")

    # Create the environment, and the Macros.cmake file
    #
    #
    configure(machobj, build_dir, ["CMake"], compiler, mpilib, debug, os_,
              unit_testing=True)
    machspecific = EnvMachSpecific(build_dir, unit_testing=True)

    fake_case = FakeCase(compiler, mpilib, debug)
    machspecific.load_env(fake_case)
    os.environ["OS"] = os_
    os.environ["COMPILER"] = compiler
    os.environ["DEBUG"] = stringify_bool(debug)
    os.environ["MPILIB"] = mpilib
    if use_openmp:
        os.environ["compile_threaded"] = "true"
    else:
        os.environ["compile_threaded"] = "false"

    os.environ["UNIT_TEST_HOST"] = socket.gethostname()
    if "NETCDF_PATH" in os.environ and not "NETCDF" in os.environ:
        # The CMake Netcdf find utility that we use (from pio2) seems to key off
        # of the environment variable NETCDF, but not NETCDF_PATH
        logger.info("Setting NETCDF environment variable: {}".format(os.environ["NETCDF_PATH"]))
        os.environ["NETCDF"] = os.environ["NETCDF_PATH"]

    if not use_mpi:
        mpirun_command = ""
    elif mpirun_command is None:
        mpi_attribs = {
            "compiler" : compiler,
            "mpilib"   : mpilib,
            "threaded" : use_openmp,
            "unit_testing" : True
        }

        # We can get away with specifying case=None since we're using exe_only=True
        mpirun_command, _ = machspecific.get_mpirun(case=None, attribs=mpi_attribs, exe_only=True)
        mpirun_command = machspecific.get_resolved_value(mpirun_command)
        logger.info("mpirun command is '{}'".format(mpirun_command))

#=================================================
# Run tests.
#=================================================

    for spec in suite_specs:
        os.chdir(build_dir)
        if os.path.isdir(spec.name):
            if clean:
                rmtree(spec.name)

        if not os.path.isdir(spec.name):
            os.mkdir(spec.name)

        for label, directory in spec:
            os.chdir(os.path.join(build_dir,spec.name))
            if not os.path.isdir(label):
                os.mkdir(label)

            os.chdir(label)

            name = spec.name+"/"+label

            if not os.path.islink("Macros.cmake"):
                os.symlink(os.path.join(build_dir,"Macros.cmake"), "Macros.cmake")
            use_mpiserial = not use_mpi
            cmake_stage(name, directory, build_optimized, use_mpiserial, mpirun_command, output, pfunit_path, verbose=verbose,
                        enable_genf90=enable_genf90, cmake_args=cmake_args)
            make_stage(name, output, make_j, clean=clean, verbose=verbose)


    for spec in suite_specs:
        os.chdir(os.path.join(build_dir,spec.name))
        for label, directory in spec:

            name = spec.name+"/"+label

            output.print_header("Running CTest tests for "+name+".")

            ctest_command = ["ctest", "--output-on-failure"]

            if verbose:
                ctest_command.append("-VV")

            if ctest_args is not None:
                ctest_command.extend(ctest_args.split(" "))

            run_cmd_no_fail(" ".join(ctest_command), from_dir=label, arg_stdout=None, arg_stderr=subprocess.STDOUT)
def parse_command_line(args, description):
    ###############################################################################
    help_str = """
    Solve a Mixed Integer Linear Program to find a PE layout that minimizes
    the wall-clock time per model day.
    """
    parser = argparse.ArgumentParser(
        usage=help_str,
        description=description,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    CIME.utils.setup_standard_logging_options(parser)

    parser.add_argument('--test-id',
                        default=DEFAULT_TESTID,
                        help='test-id to use for all timing runs')

    parser.add_argument(
        "-r",
        "--test-root",
        help="Where test cases were created."
        " Will default to output root as defined in the config_machines file")

    parser.add_argument('--timing-dir',
                        help='alternative to using casename '
                        'to find timing data, instead read all files in'
                        ' this directory')

    parser.add_argument('--blocksize',
                        help='default minimum size of blocks to assign to all '
                        'components. Components can be assigned different '
                        'blocksizes using --blocksize_XXX. Default 1',
                        type=int)

    for c in COMPONENT_LIST:
        parser.add_argument('--blocksize-%s' % c.lower(),
                            help='minimum blocksize for component %s, if '
                            'different from --blocksize',
                            type=int)

    parser.add_argument('--total-tasks',
                        type=int,
                        help='Number of pes available for assignment')

    #-------------------SK added this- July-2020-------------------------------------------------
    parser.add_argument(
        '--optimizer',
        type=int,
        help=
        'chose between the two Mixed Integer Linear Solvers  1-PuLP_COIN-CBC, 2-PuLP_GLPK'
    )
    #------------------------------------------------------------------------------------

    parser.add_argument(
        "--layout",
        help="name of layout to solve (default selected internally)")

    parser.add_argument("--graph-models",
                        action="store_true",
                        help="plot cost v. ntasks models. requires matplotlib")

    parser.add_argument("--print-models",
                        action="store_true",
                        help="print all costs and ntasks")

    parser.add_argument("--pe-output", help="write pe layout to file")

    parser.add_argument('--json-output', help="write MILP data to .json file")

    parser.add_argument('--json-input',
                        help="solve using data from .json file")

    args = CIME.utils.parse_args_and_handle_standard_logging_options(
        args, parser)
    if args.total_tasks is None and args.json_input is None:
        expect(args.total_tasks is not None or args.json_input is not None,
               "--total-tasks or --json-input option must be set")

#------------------------SK added this-July 2020-----------------
    if args.optimizer is None:
        expect(args.optimizer is not None, "--optimizer must be set")
#----------------------------------------------------
    blocksizes = {}
    for c in COMPONENT_LIST:
        attrib = 'blocksize_%s' % c.lower()
        if getattr(args, attrib) is not None:
            blocksizes[c] = getattr(args, attrib)
        elif args.blocksize is not None:
            blocksizes[c] = args.blocksize
    for c in COMPONENT_LIST:
        print c, blocksizes[c]
    test_root = args.test_root
    if test_root is None:
        machobj = Machines()
        test_root = machobj.get_value("CIME_OUTPUT_ROOT")


# SK added the args.optimizer to the below line
    return (args.test_id, test_root, args.timing_dir, blocksizes,
            args.total_tasks, args.optimizer, args.layout, args.graph_models,
            args.print_models, args.pe_output, args.json_output,
            args.json_input)
Ejemplo n.º 17
0
        if not non_local:
            case.load_env()

        models = case.get_values("COMP_CLASSES")
        mach = case.get_value("MACH")
        compiler = case.get_value("COMPILER")
        debug = case.get_value("DEBUG")
        mpilib = case.get_value("MPILIB")
        sysos = case.get_value("OS")
        comp_interface = case.get_value("COMP_INTERFACE")
        expect(mach is not None, "xml variable MACH is not set")

        # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler
        # and env_mach_specific.xml if they don't already exist.
        if not os.path.isfile("Macros.make") or not os.path.isfile("env_mach_specific.xml"):
            configure(Machines(machine=mach), caseroot, ["Makefile"], compiler, mpilib, debug, comp_interface, sysos)

        # Set tasks to 1 if mpi-serial library
        if mpilib == "mpi-serial":
            for vid, value in case:
                if vid.startswith("NTASKS") and value != 1:
                    case.set_value(vid, 1)

        # Check ninst.
        # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component.
        comp_interface = case.get_value("COMP_INTERFACE")
        if comp_interface == "nuopc":
            ninst  = case.get_value("NINST")

        multi_driver = case.get_value("MULTI_DRIVER")
Ejemplo n.º 18
0
import CIME.wait_for_tests
from CIME.utils import expect
from CIME.XML.machines import Machines

import os, shutil, glob, signal, logging

_MACHINE = Machines()


###############################################################################
def cleanup_queue(set_of_jobs_we_created):
    ###############################################################################
    """
    Delete all jobs left in the queue
    """
    current_jobs = set(CIME.utils.get_my_queued_jobs())
    jobs_to_delete = set_of_jobs_we_created & current_jobs

    if (jobs_to_delete):
        logging.warning(
            "Found leftover batch jobs that need to be deleted: %s" %
            ", ".join(jobs_to_delete))
        success = CIME.utils.delete_jobs(jobs_to_delete)
        if not success:
            logging.warning("FAILED to clean up leftover jobs!")


###############################################################################
def jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch,
                        baseline_name, arg_cdash_build_name, cdash_project,
                        arg_test_suite, cdash_build_group, baseline_compare,
Ejemplo n.º 19
0
Archivo: case.py Proyecto: ekluzek/cime
    def configure(self, compset_name, grid_name, machine_name=None,
                  project=None, pecount=None, compiler=None, mpilib=None,
                  user_compset=False, pesfile=None,
                  user_grid=False, gridfile=None, ninst=1, test=False,
                  walltime=None, queue=None):

        #--------------------------------------------
        # compset, pesfile, and compset components
        #--------------------------------------------
        self._set_compset_and_pesfile(compset_name, user_compset=user_compset, pesfile=pesfile)

        self._components = self.get_compset_components()
        #FIXME - if --user-compset is True then need to determine that
        #all of the compset settings are valid

        #--------------------------------------------
        # grid
        #--------------------------------------------
        if user_grid is True and gridfile is not None:
            self.set_value("GRIDS_SPEC_FILE", gridfile)
        grids = Grids(gridfile)

        gridinfo = grids.get_grid_info(name=grid_name, compset=self._compsetname)

        self._gridname = gridinfo["GRID"]
        for key,value in gridinfo.items():
            logger.debug("Set grid %s %s"%(key,value))
            self.set_lookup_value(key,value)

        #--------------------------------------------
        # component config data
        #--------------------------------------------
        self._get_component_config_data()

        self.get_compset_var_settings()

        #--------------------------------------------
        # machine
        #--------------------------------------------
        # set machine values in env_xxx files
        machobj = Machines(machine=machine_name)
        machine_name = machobj.get_machine_name()
        self.set_value("MACH",machine_name)
        nodenames = machobj.get_node_names()
        nodenames =  [x for x in nodenames if
                      '_system' not in x and '_variables' not in x and 'mpirun' not in x and\
                      'COMPILER' not in x and 'MPILIB' not in x]

        for nodename in nodenames:
            value = machobj.get_value(nodename, resolved=False)
            type_str = self.get_type_info(nodename)
            if type_str is not None:
                logger.debug("machine nodname %s value %s"%(nodename, value))
                self.set_value(nodename, convert_to_type(value, type_str, nodename))

        if compiler is None:
            compiler = machobj.get_default_compiler()
        else:
            expect(machobj.is_valid_compiler(compiler),
                   "compiler %s is not supported on machine %s" %(compiler, machine_name))

        self.set_value("COMPILER",compiler)

        if mpilib is None:
            mpilib = machobj.get_default_MPIlib({"compiler":compiler})
        else:
            expect(machobj.is_valid_MPIlib(mpilib, {"compiler":compiler}),
                   "MPIlib %s is not supported on machine %s" %(mpilib, machine_name))
        self.set_value("MPILIB",mpilib)

        machdir = machobj.get_machines_dir()
        self.set_value("MACHDIR", machdir)

        # Create env_mach_specific settings from machine info.
        env_mach_specific_obj = self.get_env("mach_specific")
        env_mach_specific_obj.populate(machobj)
        self.schedule_rewrite(env_mach_specific_obj)

        #--------------------------------------------
        # pe payout
        #--------------------------------------------
        match1 = re.match('([0-9]+)x([0-9]+)', "" if pecount is None else pecount)
        match2 = re.match('([0-9]+)', "" if pecount is None else pecount)
        pes_ntasks = {}
        pes_nthrds = {}
        pes_rootpe = {}
        if match1:
            opti_tasks = match1.group(1)
            opti_thrds = match1.group(2)
        elif match2:
            opti_tasks = match2.group(1)
            opti_thrds = 1

        other = {}
        if match1 or match2:
            for component_class in self._component_classes:
                if component_class == "DRV":
                    component_class = "CPL"
                string = "NTASKS_" + component_class
                pes_ntasks[string] = opti_tasks
                string = "NTHRDS_" + component_class
                pes_nthrds[string] = opti_thrds
                string = "ROOTPE_" + component_class
                pes_rootpe[string] = 0
        else:
            pesobj = Pes(self._pesfile)

            pes_ntasks, pes_nthrds, pes_rootpe, other = pesobj.find_pes_layout(self._gridname, self._compsetname,
                                                                    machine_name, pesize_opts=pecount)

        mach_pes_obj = self.get_env("mach_pes")
        totaltasks = {}
        # Since other items may include PES_PER_NODE we need to do this first
        # we can get rid of this code when all of the perl is removed
        for key, value in other.items():
            self.set_value(key, value)
        pes_per_node = self.get_value("PES_PER_NODE")
        for key, value in pes_ntasks.items():
            totaltasks[key[-3:]] = int(value)
            mach_pes_obj.set_value(key,int(value), pes_per_node=pes_per_node)
        for key, value in pes_rootpe.items():
            totaltasks[key[-3:]] += int(value)
            mach_pes_obj.set_value(key,int(value), pes_per_node=pes_per_node)
        for key, value in pes_nthrds.items():
            totaltasks[key[-3:]] *= int(value)
            mach_pes_obj.set_value(key,int(value), pes_per_node=pes_per_node)

        maxval = 1
        if mpilib != "mpi-serial":
            for key, val in totaltasks.items():
                if val < 0:
                    val = -1*val*pes_per_node
                if val > maxval:
                    maxval = val

        # Make sure that every component has been accounted for
        # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here.
        for compclass in self._component_classes:
            if compclass == "DRV":
                continue
            key = "NINST_%s"%compclass
            mach_pes_obj.set_value(key, ninst)
            key = "NTASKS_%s"%compclass
            if key not in pes_ntasks.keys():
                mach_pes_obj.set_value(key,1)
            key = "NTHRDS_%s"%compclass
            if compclass not in pes_nthrds.keys():
                mach_pes_obj.set_value(compclass,1)

        # FIXME - this is a short term fix for dealing with the restriction that
        # CISM1 cannot run on multiple cores
        if "CISM1" in self._compsetname:
            mach_pes_obj.set_value("NTASKS_GLC",1)
            mach_pes_obj.set_value("NTHRDS_GLC",1)

        #--------------------------------------------
        # batch system
        #--------------------------------------------
        batch_system_type = machobj.get_value("BATCH_SYSTEM")
        batch = Batch(batch_system=batch_system_type, machine=machine_name)
        bjobs = batch.get_batch_jobs()
        env_batch = self.get_env("batch")
        env_batch.set_batch_system(batch, batch_system_type=batch_system_type)
        env_batch.create_job_groups(bjobs)
        env_batch.set_job_defaults(bjobs, pesize=maxval, walltime=walltime, force_queue=queue)
        self.schedule_rewrite(env_batch)

        self.set_value("COMPSET",self._compsetname)

        self._set_pio_xml()
        logger.info(" Compset is: %s " %self._compsetname)
        logger.info(" Grid is: %s " %self._gridname )
        logger.info(" Components in compset are: %s " %self._components)

        # Set project id
        if project is None:
            project = get_project(machobj)
        if project is not None:
            self.set_value("PROJECT", project)
        elif machobj.get_value("PROJECT_REQUIRED"):
            expect(project is not None, "PROJECT_REQUIRED is true but no project found")

        # Overwriting an existing exeroot or rundir can cause problems
        exeroot = self.get_value("EXEROOT")
        rundir = self.get_value("RUNDIR")
        for wdir in (exeroot, rundir):
            logging.debug("wdir is %s"%wdir)
            if os.path.exists(wdir):
                expect(not test, "Directory %s already exists, aborting test"% wdir)
                response = raw_input("\nDirectory %s already exists, (r)eplace, (a)bort, or (u)se existing?"% wdir)
                if response.startswith("r"):
                    shutil.rmtree(wdir)
                else:
                    expect(response.startswith("u"), "Aborting by user request")

        # miscellaneous settings
        if self.get_value("RUN_TYPE") == 'hybrid':
            self.set_value("GET_REFCASE", True)

        # Turn on short term archiving as cesm default setting
        model = get_model()
        if model == "cesm" and not test:
            self.set_value("DOUT_S",True)
Ejemplo n.º 20
0
def configure_tests(timeout, no_fortran_run, fast, no_batch, no_cmake,
                    no_teardown, machine, compiler, mpilib, test_root,
                    **kwargs):
    config = CIME.utils.get_cime_config()

    if timeout:
        BaseTestCase.GLOBAL_TIMEOUT = str(timeout)

    BaseTestCase.NO_FORTRAN_RUN = no_fortran_run or False
    BaseTestCase.FAST_ONLY = fast or no_fortran_run
    BaseTestCase.NO_BATCH = no_batch or False
    BaseTestCase.NO_CMAKE = no_cmake or False
    BaseTestCase.NO_TEARDOWN = no_teardown or False

    # make sure we have default values
    MACHINE = None
    TEST_COMPILER = None
    TEST_MPILIB = None

    if machine is not None:
        MACHINE = Machines(machine=machine)
        os.environ["CIME_MACHINE"] = machine
    elif "CIME_MACHINE" in os.environ:
        MACHINE = Machines(machine=os.environ["CIME_MACHINE"])
    elif config.has_option("create_test", "MACHINE"):
        MACHINE = Machines(machine=config.get("create_test", "MACHINE"))
    elif config.has_option("main", "MACHINE"):
        MACHINE = Machines(machine=config.get("main", "MACHINE"))
    else:
        MACHINE = Machines()

    BaseTestCase.MACHINE = MACHINE

    if compiler is not None:
        TEST_COMPILER = compiler
    elif config.has_option("create_test", "COMPILER"):
        TEST_COMPILER = config.get("create_test", "COMPILER")
    elif config.has_option("main", "COMPILER"):
        TEST_COMPILER = config.get("main", "COMPILER")

    BaseTestCase.TEST_COMPILER = TEST_COMPILER

    if mpilib is not None:
        TEST_MPILIB = mpilib
    elif config.has_option("create_test", "MPILIB"):
        TEST_MPILIB = config.get("create_test", "MPILIB")
    elif config.has_option("main", "MPILIB"):
        TEST_MPILIB = config.get("main", "MPILIB")

    BaseTestCase.TEST_MPILIB = TEST_MPILIB

    if test_root is not None:
        TEST_ROOT = test_root
    elif config.has_option("create_test", "TEST_ROOT"):
        TEST_ROOT = config.get("create_test", "TEST_ROOT")
    else:
        TEST_ROOT = os.path.join(
            MACHINE.get_value("CIME_OUTPUT_ROOT"),
            "scripts_regression_test.%s" % CIME.utils.get_timestamp(),
        )

    BaseTestCase.TEST_ROOT = TEST_ROOT

    write_provenance_info(MACHINE, TEST_COMPILER, TEST_MPILIB, TEST_ROOT)

    atexit.register(functools.partial(cleanup, TEST_ROOT))
Ejemplo n.º 21
0
def single_submit_impl(
    machine_name, test_id, proc_pool, _, args, job_cost_map, wall_time, test_root
):
    ###############################################################################
    mach = Machines(machine=machine_name)
    expect(
        mach.has_batch_system(),
        "Single submit does not make sense on non-batch machine '%s'"
        % mach.get_machine_name(),
    )

    machine_name = mach.get_machine_name()

    #
    # Compute arg list for second call to create_test
    #
    new_args = list(args)
    new_args.remove("--single-submit")
    new_args.append("--no-batch")
    new_args.append("--use-existing")
    no_arg_is_a_test_id_arg = True
    no_arg_is_a_proc_pool_arg = True
    no_arg_is_a_machine_arg = True
    for arg in new_args:
        if arg == "-t" or arg.startswith("--test-id"):
            no_arg_is_a_test_id_arg = False
        elif arg.startswith("--proc-pool"):
            no_arg_is_a_proc_pool_arg = False
        elif arg == "-m" or arg.startswith("--machine"):
            no_arg_is_a_machine_arg = True

    if no_arg_is_a_test_id_arg:
        new_args.append("-t %s" % test_id)
    if no_arg_is_a_proc_pool_arg:
        new_args.append("--proc-pool %d" % proc_pool)
    if no_arg_is_a_machine_arg:
        new_args.append("-m %s" % machine_name)

    #
    # Resolve batch directives manually. There is currently no other way
    # to do this without making a Case object. Make a throwaway case object
    # to help us here.
    #
    testcase_dirs = glob.glob("%s/*%s*/TestStatus" % (test_root, test_id))
    expect(testcase_dirs, "No test case dirs found!?")
    first_case = os.path.abspath(os.path.dirname(testcase_dirs[0]))
    with Case(first_case, read_only=False) as case:
        env_batch = case.get_env("batch")

        submit_cmd = env_batch.get_value("batch_submit", subgroup=None)
        submit_args = env_batch.get_submit_args(case, "case.test")

    tasks_per_node = mach.get_value("MAX_MPITASKS_PER_NODE")
    num_nodes = int(math.ceil(float(proc_pool) / tasks_per_node))
    if wall_time is None:
        wall_time = compute_total_time(job_cost_map, proc_pool)
        wall_time_bab = convert_to_babylonian_time(int(wall_time))
    else:
        wall_time_bab = wall_time

    queue = env_batch.select_best_queue(num_nodes, proc_pool, walltime=wall_time_bab)
    wall_time_max_bab = env_batch.get_queue_specs(queue)[3]
    if wall_time_max_bab is not None:
        wall_time_max = convert_to_seconds(wall_time_max_bab)
        if wall_time_max < wall_time:
            wall_time = wall_time_max
            wall_time_bab = convert_to_babylonian_time(wall_time)

    overrides = {
        "job_id": "create_test_single_submit_%s" % test_id,
        "num_nodes": num_nodes,
        "tasks_per_node": tasks_per_node,
        "totaltasks": tasks_per_node * num_nodes,
        "job_wallclock_time": wall_time_bab,
        "job_queue": env_batch.text(queue),
    }

    directives = env_batch.get_batch_directives(case, "case.test", overrides=overrides)

    #
    # Make simple submit script and submit
    #

    script = "#! /bin/bash\n"
    script += "\n%s" % directives
    script += "\n"
    script += "cd %s\n" % os.getcwd()
    script += "%s %s\n" % (__file__, " ".join(new_args))

    submit_cmd = "%s %s" % (submit_cmd, submit_args)
    logger.info("Script:\n%s" % script)

    run_cmd_no_fail(
        submit_cmd, input_str=script, arg_stdout=None, arg_stderr=None, verbose=True
    )
Ejemplo n.º 22
0
def parse_command_line(args, description):
    ###############################################################################

    parser = argparse.ArgumentParser(
        description=description, formatter_class=RawTextHelpFormatter
    )

    model = CIME.utils.get_model()

    CIME.utils.setup_standard_logging_options(parser)

    config = get_cime_config()

    parser.add_argument(
        "--no-run", action="store_true", help="Do not run generated tests"
    )

    parser.add_argument(
        "--no-build",
        action="store_true",
        help="Do not build generated tests, implies --no-run",
    )

    parser.add_argument(
        "--no-setup",
        action="store_true",
        help="Do not setup generated tests, implies --no-build and --no-run",
    )

    parser.add_argument(
        "-u",
        "--use-existing",
        action="store_true",
        help="Use pre-existing case directories they will pick up at the "
        "\nlatest PEND state or re-run the first failed state. Requires test-id",
    )

    default = get_default_setting(config, "SAVE_TIMING", False, check_main=False)

    parser.add_argument(
        "--save-timing",
        action="store_true",
        default=default,
        help="Enable archiving of performance data.",
    )

    parser.add_argument(
        "--no-batch",
        action="store_true",
        help="Do not submit jobs to batch system, run locally."
        "\nIf false, this will default to machine setting.",
    )

    parser.add_argument(
        "--single-exe",
        action="store_true",
        default=False,
        help="Use a single build for all cases. This can "
        "\ndrastically improve test throughput but is currently use-at-your-own risk."
        "\nIt's up to the user to ensure that all cases are build-compatible."
        "\nE3SM tests belonging to a suite with share enabled will always share exes.",
    )

    default = get_default_setting(config, "SINGLE_SUBMIT", False, check_main=False)

    parser.add_argument(
        "--single-submit",
        action="store_true",
        default=default,
        help="Use a single interactive allocation to run all the tests. This can "
        "\ndrastically reduce queue waiting but only makes sense on batch machines.",
    )

    default = get_default_setting(config, "TEST_ROOT", None, check_main=False)

    parser.add_argument(
        "-r",
        "--test-root",
        default=default,
        help="Where test cases will be created. The default is output root"
        "\nas defined in the config_machines file",
    )

    default = get_default_setting(config, "OUTPUT_ROOT", None, check_main=False)

    parser.add_argument(
        "--output-root", default=default, help="Where the case output is written."
    )

    default = get_default_setting(config, "BASELINE_ROOT", None, check_main=False)

    parser.add_argument(
        "--baseline-root",
        default=default,
        help="Specifies a root directory for baseline datasets that will "
        "\nbe used for Bit-for-bit generate and/or compare testing.",
    )

    default = get_default_setting(config, "CLEAN", False, check_main=False)

    parser.add_argument(
        "--clean",
        action="store_true",
        default=default,
        help="Specifies if tests should be cleaned after run. If set, all object"
        "\nexecutables and data files will be removed after the tests are run.",
    )

    default = get_default_setting(config, "MACHINE", None, check_main=True)

    parser.add_argument(
        "-m",
        "--machine",
        default=default,
        help="The machine for creating and building tests. This machine must be defined"
        "\nin the config_machines.xml file for the given model. The default is to "
        "\nto match the name of the machine in the test name or the name of the "
        "\nmachine this script is run on to the NODENAME_REGEX field in "
        "\nconfig_machines.xml. WARNING: This option is highly unsafe and should "
        "\nonly be used if you are an expert.",
    )

    default = get_default_setting(config, "MPILIB", None, check_main=True)

    parser.add_argument(
        "--mpilib",
        default=default,
        help="Specify the mpilib. To see list of supported MPI libraries for each machine, "
        "\ninvoke ./query_config. The default is the first listing .",
    )

    if model in ["cesm", "ufs"]:
        parser.add_argument(
            "-c",
            "--compare",
            help="While testing, compare baselines against the given compare directory. ",
        )

        parser.add_argument(
            "-g",
            "--generate",
            help="While testing, generate baselines in the given generate directory. "
            "\nNOTE: this can also be done after the fact with bless_test_results",
        )

        parser.add_argument(
            "--xml-machine",
            help="Use this machine key in the lookup in testlist.xml. "
            "\nThe default is all if any --xml- argument is used.",
        )

        parser.add_argument(
            "--xml-compiler",
            help="Use this compiler key in the lookup in testlist.xml. "
            "\nThe default is all if any --xml- argument is used.",
        )

        parser.add_argument(
            "--xml-category",
            help="Use this category key in the lookup in testlist.xml. "
            "\nThe default is all if any --xml- argument is used.",
        )

        parser.add_argument(
            "--xml-testlist",
            help="Use this testlist to lookup tests.The default is specified in config_files.xml",
        )

        parser.add_argument(
            "--xml-driver",
            choices=("mct", "nuopc", "moab"),
            help="Override driver specified in tests and use this one.",
        )

        parser.add_argument(
            "testargs",
            nargs="*",
            help="Tests to run. Testname form is TEST.GRID.COMPSET[.MACHINE_COMPILER]",
        )

    else:

        parser.add_argument(
            "testargs",
            nargs="+",
            help="Tests or test suites to run."
            " Testname form is TEST.GRID.COMPSET[.MACHINE_COMPILER]",
        )

        parser.add_argument(
            "-b",
            "--baseline-name",
            help="If comparing or generating baselines, use this directory under baseline root. "
            "\nDefault will be current branch name.",
        )

        parser.add_argument(
            "-c",
            "--compare",
            action="store_true",
            help="While testing, compare baselines",
        )

        parser.add_argument(
            "-g",
            "--generate",
            action="store_true",
            help="While testing, generate baselines. "
            "\nNOTE: this can also be done after the fact with bless_test_results",
        )

    default = get_default_setting(config, "COMPILER", None, check_main=True)

    parser.add_argument(
        "--compiler",
        default=default,
        help="Compiler for building cime. Default will be the name in the "
        "\nTestname or the default defined for the machine.",
    )

    parser.add_argument(
        "-n",
        "--namelists-only",
        action="store_true",
        help="Only perform namelist actions for tests",
    )

    parser.add_argument(
        "-p",
        "--project",
        help="Specify a project id for the case (optional)."
        "\nUsed for accounting and directory permissions when on a batch system."
        "\nThe default is user or machine specified by PROJECT."
        "\nAccounting (only) may be overridden by user or machine specified CHARGE_ACCOUNT.",
    )

    parser.add_argument(
        "-t",
        "--test-id",
        help="Specify an 'id' for the test. This is simply a string that is appended "
        "\nto the end of a test name. If no test-id is specified, a time stamp plus a "
        "\nrandom string will be used (ensuring a high probability of uniqueness). "
        "\nIf a test-id is specified, it is the user's responsibility to ensure that "
        "\neach run of create_test uses a unique test-id. WARNING: problems will occur "
        "\nif you use the same test-id twice on the same file system, even if the test "
        "\nlists are completely different.",
    )

    default = get_default_setting(config, "PARALLEL_JOBS", None, check_main=False)

    parser.add_argument(
        "-j",
        "--parallel-jobs",
        type=int,
        default=default,
        help="Number of tasks create_test should perform simultaneously. The default "
        "\n is min(num_cores, num_tests).",
    )

    default = get_default_setting(config, "PROC_POOL", None, check_main=False)

    parser.add_argument(
        "--proc-pool",
        type=int,
        default=default,
        help="The size of the processor pool that create_test can use. The default is "
        "\nMAX_MPITASKS_PER_NODE + 25 percent.",
    )

    default = os.getenv("CIME_GLOBAL_WALLTIME")
    if default is None:
        default = get_default_setting(config, "WALLTIME", None, check_main=True)

    parser.add_argument(
        "--walltime",
        default=default,
        help="Set the wallclock limit for all tests in the suite. "
        "\nUse the variable CIME_GLOBAL_WALLTIME to set this for all tests.",
    )

    default = get_default_setting(config, "JOB_QUEUE", None, check_main=True)

    parser.add_argument(
        "-q",
        "--queue",
        default=default,
        help="Force batch system to use a certain queue",
    )

    parser.add_argument(
        "-f", "--testfile", help="A file containing an ascii list of tests to run"
    )

    default = get_default_setting(
        config, "ALLOW_BASELINE_OVERWRITE", False, check_main=False
    )

    parser.add_argument(
        "-o",
        "--allow-baseline-overwrite",
        action="store_true",
        default=default,
        help="If the --generate option is given, then an attempt to overwrite "
        "\nan existing baseline directory will raise an error. WARNING: Specifying this "
        "\noption will allow existing baseline directories to be silently overwritten.",
    )

    default = get_default_setting(config, "WAIT", False, check_main=False)

    parser.add_argument(
        "--wait",
        action="store_true",
        default=default,
        help="On batch systems, wait for submitted jobs to complete",
    )

    default = get_default_setting(config, "ALLOW_PNL", False, check_main=False)

    parser.add_argument(
        "--allow-pnl",
        action="store_true",
        default=default,
        help="Do not pass skip-pnl to case.submit",
    )

    parser.add_argument(
        "--check-throughput",
        action="store_true",
        help="Fail if throughput check fails. Requires --wait on batch systems",
    )

    parser.add_argument(
        "--check-memory",
        action="store_true",
        help="Fail if memory check fails. Requires --wait on batch systems",
    )

    parser.add_argument(
        "--ignore-namelists",
        action="store_true",
        help="Do not fail if there namelist diffs",
    )

    parser.add_argument(
        "--ignore-memleak", action="store_true", help="Do not fail if there's a memleak"
    )

    default = get_default_setting(config, "FORCE_PROCS", None, check_main=False)

    parser.add_argument(
        "--force-procs",
        type=int,
        default=default,
        help="For all tests to run with this number of processors",
    )

    default = get_default_setting(config, "FORCE_THREADS", None, check_main=False)

    parser.add_argument(
        "--force-threads",
        type=int,
        default=default,
        help="For all tests to run with this number of threads",
    )

    default = get_default_setting(config, "INPUT_DIR", None, check_main=True)

    parser.add_argument(
        "-i",
        "--input-dir",
        default=default,
        help="Use a non-default location for input files",
    )

    default = get_default_setting(config, "PESFILE", None, check_main=True)

    parser.add_argument(
        "--pesfile",
        default=default,
        help="Full pathname of an optional pes specification file. The file"
        "\ncan follow either the config_pes.xml or the env_mach_pes.xml format.",
    )

    default = get_default_setting(config, "RETRY", 0, check_main=False)

    parser.add_argument(
        "--retry",
        type=int,
        default=default,
        help="Automatically retry failed tests. >0 implies --wait",
    )

    parser.add_argument(
        "-N",
        "--non-local",
        action="store_true",
        help="Use when you've requested a machine that you aren't on. "
        "Will reduce errors for missing directories etc.",
    )

    if config and config.has_option("main", "workflow"):
        workflow_default = config.get("main", "workflow")
    else:
        workflow_default = "default"

    parser.add_argument(
        "--workflow",
        default=workflow_default,
        help="A workflow from config_workflow.xml to apply to this case. ",
    )

    parser.add_argument(
        "--chksum", action="store_true", help="Verifies input data checksums."
    )

    srcroot_default = utils.get_src_root()

    parser.add_argument(
        "--srcroot",
        default=srcroot_default,
        help="Alternative pathname for source root directory. "
        f"The default is {srcroot_default}",
    )

    CIME.utils.add_mail_type_args(parser)

    args = CIME.utils.parse_args_and_handle_standard_logging_options(args, parser)

    CIME.utils.resolve_mail_type_args(args)

    # generate and compare flags may not point to the same directory
    if model in ["cesm", "ufs"]:
        if args.generate is not None:
            expect(
                not (args.generate == args.compare),
                "Cannot generate and compare baselines at the same time",
            )

        if args.xml_testlist is not None:
            expect(
                not (
                    args.xml_machine is None
                    and args.xml_compiler is None
                    and args.xml_category is None
                ),
                "If an xml-testlist is present at least one of --xml-machine, "
                "--xml-compiler, --xml-category must also be present",
            )

    else:
        expect(
            not (
                args.baseline_name is not None
                and (not args.compare and not args.generate)
            ),
            "Provided baseline name but did not specify compare or generate",
        )
        expect(
            not (args.compare and args.generate),
            "Tried to compare and generate at same time",
        )

    expect(
        not (args.namelists_only and not (args.generate or args.compare)),
        "Must provide either --compare or --generate with --namelists-only",
    )

    if args.retry > 0:
        args.wait = True

    if args.parallel_jobs is not None:
        expect(
            args.parallel_jobs > 0,
            "Invalid value for parallel_jobs: %d" % args.parallel_jobs,
        )

    if args.use_existing:
        expect(args.test_id is not None, "Must provide test-id of pre-existing cases")

    if args.no_setup:
        args.no_build = True

    if args.no_build:
        args.no_run = True

    # Namelist-only forces some other options:
    if args.namelists_only:
        expect(not args.no_setup, "Cannot compare namelists without setup")
        args.no_build = True
        args.no_run = True
        args.no_batch = True

    expect(
        not (args.non_local and not args.no_build), "Cannot build on non-local machine"
    )

    if args.single_submit:
        expect(
            not args.no_run,
            "Doesn't make sense to request single-submit if no-run is on",
        )
        args.no_build = True
        args.no_run = True
        args.no_batch = True

    if args.test_id is None:
        args.test_id = "%s_%s" % (CIME.utils.get_timestamp(), CIME.utils.id_generator())
    else:
        expect(
            CIME.utils.check_name(args.test_id, additional_chars="."),
            "invalid test-id argument provided",
        )

    if args.testfile is not None:
        with open(args.testfile, "r") as fd:
            args.testargs.extend(
                [
                    line.strip()
                    for line in fd.read().splitlines()
                    if line.strip() and not line.startswith("#")
                ]
            )

    # Propagate `srcroot` to `GenericXML` to resolve $SRCROOT
    # See call to `Machines` below
    utils.GLOBAL["SRCROOT"] = args.srcroot

    # Compute list of fully-resolved test_names
    test_extra_data = {}
    if model in ["cesm", "ufs"]:
        machine_name = args.xml_machine if args.machine is None else args.machine

        # If it's still unclear what machine to use, look at test names
        if machine_name is None:
            for test in args.testargs:
                testsplit = CIME.utils.parse_test_name(test)
                if testsplit[4] is not None:
                    if machine_name is None:
                        machine_name = testsplit[4]
                    else:
                        expect(
                            machine_name == testsplit[4],
                            "ambiguity in machine, please use the --machine option",
                        )

        mach_obj = Machines(machine=machine_name)
        if args.testargs:
            args.compiler = (
                mach_obj.get_default_compiler()
                if args.compiler is None
                else args.compiler
            )
            test_names = get_tests.get_full_test_names(
                args.testargs, mach_obj.get_machine_name(), args.compiler
            )
        else:
            expect(
                not (
                    args.xml_machine is None
                    and args.xml_compiler is None
                    and args.xml_category is None
                    and args.xml_testlist is None
                ),
                "At least one of --xml-machine, --xml-testlist, "
                "--xml-compiler, --xml-category or a valid test name must be provided.",
            )

            test_data = get_tests_from_xml(
                xml_machine=args.xml_machine,
                xml_category=args.xml_category,
                xml_compiler=args.xml_compiler,
                xml_testlist=args.xml_testlist,
                machine=machine_name,
                compiler=args.compiler,
                driver=args.xml_driver,
            )
            test_names = [item["name"] for item in test_data]
            for test_datum in test_data:
                test_extra_data[test_datum["name"]] = test_datum

        logger.info("Testnames: %s" % test_names)
    else:
        if args.machine is None:
            args.machine = get_tests.infer_machine_name_from_tests(args.testargs)

        mach_obj = Machines(machine=args.machine)
        args.compiler = (
            mach_obj.get_default_compiler() if args.compiler is None else args.compiler
        )

        test_names = get_tests.get_full_test_names(
            args.testargs, mach_obj.get_machine_name(), args.compiler
        )

    expect(
        mach_obj.is_valid_compiler(args.compiler),
        "Compiler %s not valid for machine %s"
        % (args.compiler, mach_obj.get_machine_name()),
    )

    if not args.wait and mach_obj.has_batch_system() and not args.no_batch:
        expect(
            not args.check_throughput,
            "Makes no sense to use --check-throughput without --wait",
        )
        expect(
            not args.check_memory, "Makes no sense to use --check-memory without --wait"
        )

    # Normalize compare/generate between the models
    baseline_cmp_name = None
    baseline_gen_name = None
    if args.compare or args.generate:
        if model in ["cesm", "ufs"]:
            if args.compare is not None:
                baseline_cmp_name = args.compare
            if args.generate is not None:
                baseline_gen_name = args.generate
        else:
            baseline_name = (
                args.baseline_name
                if args.baseline_name
                else CIME.utils.get_current_branch(repo=CIME.utils.get_cime_root())
            )
            expect(
                baseline_name is not None,
                "Could not determine baseline name from branch, please use -b option",
            )
            if args.compare:
                baseline_cmp_name = baseline_name
            elif args.generate:
                baseline_gen_name = baseline_name

    if args.input_dir is not None:
        args.input_dir = os.path.abspath(args.input_dir)

    # sanity check
    for name in test_names:
        dot_count = name.count(".")
        expect(dot_count > 1 and dot_count <= 4, "Invalid test Name, '{}'".format(name))

    # for e3sm, sort by walltime
    if model == "e3sm":
        if args.walltime is None:
            # Longest tests should run first
            test_names.sort(key=get_tests.key_test_time, reverse=True)
        else:
            test_names.sort()

    return (
        test_names,
        test_extra_data,
        args.compiler,
        mach_obj.get_machine_name(),
        args.no_run,
        args.no_build,
        args.no_setup,
        args.no_batch,
        args.test_root,
        args.baseline_root,
        args.clean,
        baseline_cmp_name,
        baseline_gen_name,
        args.namelists_only,
        args.project,
        args.test_id,
        args.parallel_jobs,
        args.walltime,
        args.single_submit,
        args.proc_pool,
        args.use_existing,
        args.save_timing,
        args.queue,
        args.allow_baseline_overwrite,
        args.output_root,
        args.wait,
        args.force_procs,
        args.force_threads,
        args.mpilib,
        args.input_dir,
        args.pesfile,
        args.retry,
        args.mail_user,
        args.mail_type,
        args.check_throughput,
        args.check_memory,
        args.ignore_namelists,
        args.ignore_memleak,
        args.allow_pnl,
        args.non_local,
        args.single_exe,
        args.workflow,
        args.chksum,
    )
Ejemplo n.º 23
0
    def _compare_baseline(self):
        with self._test_status:
            if int(self._case.get_value("RESUBMIT")) > 0:
                # This is here because the comparison is run for each submission
                # and we only want to compare once the whole run is finished. We
                # need to return a pass here to continue the submission process.
                self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                             CIME.test_status.TEST_PASS_STATUS)
                return

            self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                         CIME.test_status.TEST_FAIL_STATUS)

            run_dir = self._case.get_value("RUNDIR")
            case_name = self._case.get_value("CASE")
            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
                                    self._case.get_value("BASECMP_CASE"))

            test_name = "{}".format(case_name.split('.')[-1])
            evv_config = {
                test_name: {
                    "module": os.path.join(evv_lib_dir, "extensions", "ks.py"),
                    "test-case": "Test",
                    "test-dir": run_dir,
                    "ref-case": "Baseline",
                    "ref-dir": base_dir,
                    "var-set": "default",
                    "ninst": NINST,
                    "critical": 13
                }
            }

            json_file = os.path.join(run_dir, '.'.join([case_name, 'json']))
            with open(json_file, 'w') as config_file:
                json.dump(evv_config, config_file, indent=4)

            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
            evv(['-e', json_file, '-o', evv_out_dir])

            with open(os.path.join(evv_out_dir, 'index.json')) as evv_f:
                evv_status = json.load(evv_f)

            comments = ""
            for evv_elem in evv_status['Data']['Elements']:
                if evv_elem['Type'] == 'ValSummary' \
                        and evv_elem['TableTitle'] == 'Kolmogorov-Smirnov test':
                    comments = "; ".join("{}: {}".format(key, val) for key, val
                                         in evv_elem['Data'][test_name][''].items())
                    if evv_elem['Data'][test_name]['']['Test status'].lower() == 'pass':
                        self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                                     CIME.test_status.TEST_PASS_STATUS)
                    break

                status = self._test_status.get_status(CIME.test_status.BASELINE_PHASE)
                mach_name = self._case.get_value("MACH")
                mach_obj = Machines(machine=mach_name)
                htmlroot = CIME.utils.get_htmlroot(mach_obj)
                urlroot = CIME.utils.get_urlroot(mach_obj)
                if htmlroot is not None:
                    with CIME.utils.SharedArea():
                        dir_util.copy_tree(evv_out_dir, os.path.join(htmlroot, 'evv', case_name), preserve_mode=False)
                    if urlroot is None:
                        urlroot = "[{}_URL]".format(mach_name.capitalize())
                    viewing = "{}/evv/{}/index.html".format(urlroot, case_name)
                else:
                    viewing = "{}\n" \
                              "    EVV viewing instructions can be found at: " \
                              "        https://github.com/E3SM-Project/E3SM/blob/master/cime/scripts/" \
                              "climate_reproducibility/README.md#test-passfail-and-extended-output" \
                              "".format(evv_out_dir)

                comments = "{} {} for test '{}'.\n" \
                           "    {}\n" \
                           "    EVV results can be viewed at:\n" \
                           "        {}".format(CIME.test_status.BASELINE_PHASE, status, test_name, comments, viewing)

                CIME.utils.append_testlog(comments, self._orig_caseroot)
Ejemplo n.º 24
0
    def __init__(self, test_names, test_data=None,
                 no_run=False, no_build=False, no_setup=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_cmp_name=None, baseline_gen_name=None,
                 clean=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 walltime=None, proc_pool=None,
                 use_existing=False, save_timing=False, queue=None, allow_baseline_overwrite=False):
    ###########################################################################
        self._cime_root  = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        self._allow_baseline_overwrite  = allow_baseline_overwrite
        self._save_timing = save_timing
        self._queue       = queue
        self._test_data   = {} if test_data is None else test_data # Format:  {test_name -> {data_name -> data}}

        self._machobj = Machines(machine=machine_name)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run   = no_run or self._no_build

        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        self._test_root = self._machobj.get_value("CESMSCRATCHROOT") if test_root is None else test_root
        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_timestamp()

        self._compiler = self._machobj.get_default_compiler() if compiler is None else compiler

        self._clean          = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name # Implies generation should be done if not None

        if baseline_cmp_name or baseline_gen_name:
            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None \
                else self._machobj.get_value("CCSM_BASELINE")

            if self._project is not None:
                self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

            self._baseline_root = os.path.abspath(self._baseline_root)

            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory %s" % full_baseline_dir)

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)
                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                           "Baseline directories already exists %s\n"\
                           "Use --allow_baseline_overwrite to avoid this error"%existing_baselines)
        else:
            self._baseline_root = None

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # Since the name-list phase can fail without aborting later phases, we
        # need some extra state to remember tests that had namelist problems.
        # name -> (phase, status, has_namelist_problem)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS, False)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("PES_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)
        if not self._baseline_cmp_name and not self._baseline_gen_name:
            self._phases.remove(NAMELIST_PHASE)

        if use_existing:
            for test in self._tests:
                ts = TestStatus(self._get_test_dir(test))
                for phase, status in ts:
                    if phase in CORE_PHASES:
                        if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                            # We need to pick up here
                            break
                        else:
                            self._update_test_status(test, phase, TEST_PEND_STATUS)
                            self._update_test_status(test, phase, status)
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '%s', it already exists."
                       " Pick a different test-id" % self._get_test_dir(test))
Ejemplo n.º 25
0
class TestScheduler(object):
###############################################################################

    ###########################################################################
    def __init__(self, test_names, test_data=None,
                 no_run=False, no_build=False, no_setup=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_cmp_name=None, baseline_gen_name=None,
                 clean=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 walltime=None, proc_pool=None,
                 use_existing=False, save_timing=False, queue=None, allow_baseline_overwrite=False):
    ###########################################################################
        self._cime_root  = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        self._allow_baseline_overwrite  = allow_baseline_overwrite
        self._save_timing = save_timing
        self._queue       = queue
        self._test_data   = {} if test_data is None else test_data # Format:  {test_name -> {data_name -> data}}

        self._machobj = Machines(machine=machine_name)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run   = no_run or self._no_build

        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        self._test_root = self._machobj.get_value("CESMSCRATCHROOT") if test_root is None else test_root
        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_timestamp()

        self._compiler = self._machobj.get_default_compiler() if compiler is None else compiler

        self._clean          = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name # Implies generation should be done if not None

        if baseline_cmp_name or baseline_gen_name:
            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None \
                else self._machobj.get_value("CCSM_BASELINE")

            if self._project is not None:
                self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

            self._baseline_root = os.path.abspath(self._baseline_root)

            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory %s" % full_baseline_dir)

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)
                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                           "Baseline directories already exists %s\n"\
                           "Use --allow_baseline_overwrite to avoid this error"%existing_baselines)
        else:
            self._baseline_root = None

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # Since the name-list phase can fail without aborting later phases, we
        # need some extra state to remember tests that had namelist problems.
        # name -> (phase, status, has_namelist_problem)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS, False)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("PES_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)
        if not self._baseline_cmp_name and not self._baseline_gen_name:
            self._phases.remove(NAMELIST_PHASE)

        if use_existing:
            for test in self._tests:
                ts = TestStatus(self._get_test_dir(test))
                for phase, status in ts:
                    if phase in CORE_PHASES:
                        if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                            # We need to pick up here
                            break
                        else:
                            self._update_test_status(test, phase, TEST_PEND_STATUS)
                            self._update_test_status(test, phase, status)
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '%s', it already exists."
                       " Pick a different test-id" % self._get_test_dir(test))

        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def _log_output(self, test, output):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if not os.path.isdir(test_dir):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)
        append_status(output,caseroot=test_dir,sfile="TestStatus.log")

    ###########################################################################
    def _get_case_id(self, test):
    ###########################################################################
        baseline_action_code = ""
        if self._baseline_gen_name:
            baseline_action_code += "G"
        if self._baseline_cmp_name:
            baseline_action_code += "C"
        if len(baseline_action_code) > 0:
            return "%s.%s.%s" % (test, baseline_action_code, self._test_id)
        else:
            return "%s.%s" % (test, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
    ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
    ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
    ###########################################################################
        status = self._get_test_status(test)
        return status not in CONTINUE and status != TEST_PEND_STATUS

    ###########################################################################
    def _work_remains(self, test):
    ###########################################################################
        test_phase, test_status, _ = self._get_test_data(test)
        return (test_status in CONTINUE or test_status == TEST_PEND_STATUS) and\
            test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
    ###########################################################################
        curr_phase, curr_status, nl_fail = self._get_test_data(test)
        if phase == NAMELIST_PHASE and nl_fail:
            return NAMELIST_FAIL_STATUS
        elif phase is None or phase == curr_phase:
            return curr_status
        else:
            expect(phase is None or self._phases.index(phase) < self._phases.index(curr_phase),
                   "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
    ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
    ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status, old_nl_fail = self._get_test_data(test)

        if old_phase == phase:
            expect(old_status == TEST_PEND_STATUS,
                   "Only valid to transition from PEND to something else, found '%s' for phase '%s'" %
                   (old_status, phase))
            expect(status != TEST_PEND_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(old_status in CONTINUE,
                   "Why did we move on to next phase when prior phase did not pass?")
            expect(status == TEST_PEND_STATUS,
                   "New phase should be set to pending status")
            expect(self._phases.index(old_phase) == phase_idx - 1,
                   "Skipped phase? %s %s"%(old_phase, phase_idx))
        # Must be atomic
        self._tests[test] = (phase, status, old_nl_fail)

    ###########################################################################
    def _test_has_nl_problem(self, test):
    ###########################################################################
        curr_phase, curr_status, _ = self._get_test_data(test)
        expect(curr_phase == NAMELIST_PHASE, "Setting namelist status outside of namelist phase?")
        # Must be atomic
        self._tests[test] = (curr_phase, curr_status, True)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
    ###########################################################################
        while True:
            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
            if rc != 0:
                self._log_output(test,
                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                 (phase, test, cmd, output, errput))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if "bad interpreter" in errput:
                    time.sleep(1)
                    continue
                else:
                    break
            else:
                # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                # succeeded was the submission.
                if phase != RUN_PHASE or self._no_batch:
                    self._log_output(test,
                                     "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                     (phase, test, cmd, output, errput))
                break

        return rc == 0

    ###########################################################################
    def _create_newcase_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        _, case_opts, grid, compset,\
            machine, compiler, test_mods = CIME.utils.parse_test_name(test)

        create_newcase_cmd = "%s --case %s --res %s --mach %s --compiler %s --compset %s"\
                               " --test" % \
                              (os.path.join(self._cime_root, "scripts", "create_newcase"),
                               test_dir, grid, machine, compiler, compset)
        if self._project is not None:
            create_newcase_cmd += " --project %s " % self._project

        if test_mods is not None:
            files = Files()
            (component,modspath) = test_mods.split('/',1)
            testmods_dir = files.get_value("TESTS_MODS_DIR", {"component": component})

            test_mod_file = os.path.join(testmods_dir, component, modspath)
            if not os.path.exists(test_mod_file):
                self._log_output(test, "Missing testmod file '%s'" % test_mod_file)
                return False
            create_newcase_cmd += " --user-mods-dir %s" % test_mod_file

        if case_opts is not None:
            for case_opt in case_opts: # pylint: disable=not-an-iterable
                if case_opt.startswith('M'):
                    mpilib = case_opt[1:]
                    create_newcase_cmd += " --mpilib %s" % mpilib
                    logger.debug (" MPILIB set to %s" % mpilib)
                if case_opt.startswith('N'):
                    ninst = case_opt[1:]
                    create_newcase_cmd += " --ninst %s" %ninst
                    logger.debug (" NINST set to %s" % ninst)
                if case_opt.startswith('P'):
                    pesize = case_opt[1:]
                    create_newcase_cmd += " --pecount %s"%pesize

        if self._queue is not None:
            create_newcase_cmd += " --queue=%s" % self._queue

        if self._walltime is not None:
            create_newcase_cmd += " --walltime %s" % self._walltime
        elif test in self._test_data and "options" in self._test_data[test] and \
                "wallclock" in self._test_data[test]['options']:
            create_newcase_cmd += " --walltime %s" % self._test_data[test]['options']['wallclock']

        logger.debug("Calling create_newcase: " + create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd, CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
    ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]

        # Create, fill and write an envtest object
        test_dir = self._get_test_dir(test)
        envtest = EnvTest(test_dir)

        # Determine list of component classes that this coupler/driver knows how
        # to deal with. This list follows the same order as compset longnames follow.
        files = Files()
        drv_config_file = files.get_value("CONFIG_DRV_FILE")
        drv_comp = Component(drv_config_file)
        envtest.add_elements_by_group(files, {}, "env_test.xml")
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)
        if test in self._test_data and "options" in self._test_data[test] and \
                "memleak_tolerance" in self._test_data[test]['options']:
            envtest.set_value("TEST_MEMLEAK_TOLERANCE", self._test_data[test]['options']['memleak_tolerance'])

        test_argv = "-testname %s -testroot %s" % (test, self._test_root)
        if self._baseline_gen_name:
            test_argv += " -generate %s" % self._baseline_gen_name
            basegen_case_fullpath = os.path.join(self._baseline_root,self._baseline_gen_name, test)
            logger.debug("basegen_case is %s"%basegen_case_fullpath)
            envtest.set_value("BASELINE_NAME_GEN", self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE", os.path.join(self._baseline_gen_name, test))
        if self._baseline_cmp_name:
            test_argv += " -compare %s" % self._baseline_cmp_name
            envtest.set_value("BASELINE_NAME_CMP", self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE", os.path.join(self._baseline_cmp_name, test))

        envtest.set_value("TEST_ARGV", test_argv)
        envtest.set_value("CLEANUP", self._clean)

        if self._baseline_gen_name or self._baseline_cmp_name:
            envtest.set_value("BASELINE_ROOT", self._baseline_root)
        envtest.set_value("GENERATE_BASELINE", self._baseline_gen_name is not None)
        envtest.set_value("COMPARE_BASELINE", self._baseline_cmp_name is not None)
        envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC", resolved=False))

        # Add the test instructions from config_test to env_test in the case
        config_test = Tests()
        testnode = config_test.get_test_node(test_case)
        envtest.add_test(testnode)

        # Determine the test_case from the test name
        test_case, case_opts = CIME.utils.parse_test_name(test)[:2]

        # Determine case_opts from the test_case
        if case_opts is not None:
            logger.debug("case_opts are %s " %case_opts)
            for opt in case_opts:

                logger.debug("case_opt is %s" %opt)
                if opt == 'D':
                    envtest.set_test_parameter("DEBUG", "TRUE")
                    logger.debug (" DEBUG set to TRUE")

                elif opt == 'E':
                    envtest.set_test_parameter("USE_ESMF_LIB", "TRUE")
                    envtest.set_test_parameter("COMP_INTERFACE", "ESMF")
                    logger.debug (" USE_ESMF_LIB set to TRUE")
                    logger.debug (" COMP_INTERFACE set to ESMF")

                elif opt == 'CG':
                    envtest.set_test_parameter("CALENDAR", "GREGORIAN")
                    logger.debug (" CALENDAR set to %s" %opt)

                elif opt.startswith('L'):
                    match =  re.match('L([A-Za-z])([0-9]*)', opt)
                    stop_option = {"y":"nyears", "m":"nmonths", "d":"ndays", "h":"nhours",
                                   "s":"nseconds", "n":"nsteps"}
                    opt = match.group(1)
                    envtest.set_test_parameter("STOP_OPTION",stop_option[opt])
                    opti = match.group(2)
                    envtest.set_test_parameter("STOP_N", opti)
                    logger.debug (" STOP_OPTION set to %s" %stop_option[opt])
                    logger.debug (" STOP_N      set to %s" %opti)

                elif opt.startswith('M'):
                    # M option handled by create newcase
                    continue

                elif opt.startswith('P'):
                    # P option handled by create newcase
                    continue

                elif opt.startswith('N'):
                    # handled in create_newcase
                    continue
                elif opt.startswith('IOP'):
                    logger.warn("IOP test option not yet implemented")
                else:
                    expect(False, "Could not parse option '%s' " %opt)

        envtest.write()
        lockedfiles = os.path.join(test_dir, "LockedFiles")
        if not os.path.exists(lockedfiles):
            os.mkdir(lockedfiles)
        shutil.copy(os.path.join(test_dir,"env_run.xml"),
                    os.path.join(lockedfiles, "env_run.orig.xml"))

        with Case(test_dir, read_only=False) as case:
            case.set_value("SHAREDLIBROOT",
                           os.path.join(self._test_root,
                                        "sharedlibroot.%s"%self._test_id))
            envtest.set_initial_values(case)
            if self._save_timing:
                case.set_value("SAVE_TIMING", True)

        return True

    ###########################################################################
    def _setup_phase(self, test):
    ###########################################################################
        test_dir  = self._get_test_dir(test)

        return self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)

    ###########################################################################
    def _nlcomp_phase(self, test):
    ###########################################################################
        test_dir       = self._get_test_dir(test)
        casedoc_dir    = os.path.join(test_dir, "CaseDocs")
        compare_nl     = os.path.join(CIME.utils.get_scripts_root(), "Tools", "compare_namelists")
        simple_compare = os.path.join(CIME.utils.get_scripts_root(), "Tools", "simple_compare")

        if self._baseline_cmp_name:
            has_fails         = False
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_cmp_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")

            # Start off by comparing everything in CaseDocs except a few arbitrary files (ugh!)
            # TODO: Namelist files should have consistent suffix
            all_items_to_compare = [item for item in glob.glob("%s/*" % casedoc_dir)\
                                    if "README" not in os.path.basename(item)\
                                    and not item.endswith("doc")\
                                    and not item.endswith("prescribed")\
                                    and not os.path.basename(item).startswith(".")] + \
                                    glob.glob("%s/*user_nl*" % test_dir)
            for item in all_items_to_compare:
                baseline_counterpart = os.path.join(baseline_casedocs \
                                                    if os.path.dirname(item).endswith("CaseDocs") \
                                                    else baseline_dir,os.path.basename(item))
                if not os.path.exists(baseline_counterpart):
                    self._log_output(test, "Missing baseline namelist '%s'" % baseline_counterpart)
                    has_fails = True
                else:
                    if CIME.compare_namelists.is_namelist_file(item):
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" %
                                                 (compare_nl, baseline_counterpart, item, test))
                    else:
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" %
                                                 (simple_compare, baseline_counterpart, item, test))

                    if rc != 0:
                        has_fails = True
                        self._log_output(test, output)

            if has_fails:
                self._test_has_nl_problem(test)

        if self._baseline_gen_name:
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_gen_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")
            if not os.path.isdir(baseline_dir):
                os.makedirs(baseline_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH)

            if os.path.isdir(baseline_casedocs):
                shutil.rmtree(baseline_casedocs)

            shutil.copytree(casedoc_dir, baseline_casedocs)
            os.chmod(baseline_casedocs, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH)
            for item in glob.glob("%s/*" % baseline_casedocs):
                os.chmod(item, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)

            for item in glob.glob(os.path.join(test_dir, "user_nl*")):
                preexisting_baseline = os.path.join(baseline_dir, os.path.basename(item))
                if (os.path.exists(preexisting_baseline)):
                    os.remove(preexisting_baseline)
                shutil.copy2(item, baseline_dir)
                os.chmod(preexisting_baseline, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)

        # Always mark as passed unless we hit exception
        return True

    ###########################################################################
    def _sharedlib_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --sharedlib-only", SHAREDLIB_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _model_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --model-only", MODEL_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if self._no_batch:
            cmd = "./case.submit --no-batch"
        else:
            cmd = "./case.submit "

        return self._shell_cmd_for_phase(test, cmd, RUN_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_catch_exceptions(self, test, phase, run):
    ###########################################################################
        try:
            return run(test)
        except (SystemExit, Exception) as e:
            exc_tb = sys.exc_info()[2]
            errput = "Test '%s' failed in phase '%s' with exception '%s'" % (test, phase, str(e))
            self._log_output(test, errput)
            logger.warning("Caught exception: %s" % str(e))
            traceback.print_tb(exc_tb)
            return False

    ###########################################################################
    def _get_procs_needed(self, test, phase, threads_in_flight=None, no_batch=False):
    ###########################################################################
        if phase == RUN_PHASE and (self._no_batch or no_batch):
            test_dir = self._get_test_dir(test)
            out = run_cmd_no_fail("./xmlquery TOTALPES -value", from_dir=test_dir)
            return int(out)
        elif (phase == SHAREDLIB_BUILD_PHASE):
            # Will force serialization of sharedlib builds
            # TODO - instead of serializing, compute all library configs needed and build
            # them all in parallel
            for _, _, running_phase in threads_in_flight.values():
                if (running_phase == SHAREDLIB_BUILD_PHASE):
                    return self._proc_pool + 1

            return 1
        elif (phase == MODEL_BUILD_PHASE):
            # Model builds now happen in parallel
            return 4
        else:
            return 1

    ###########################################################################
    def _wait_for_something_to_finish(self, threads_in_flight):
    ###########################################################################
        expect(len(threads_in_flight) <= self._parallel_jobs, "Oversubscribed?")
        finished_tests = []
        while not finished_tests:
            for test, thread_info in threads_in_flight.iteritems():
                if not thread_info[0].is_alive():
                    finished_tests.append((test, thread_info[1]))

            if not finished_tests:
                time.sleep(0.2)

        for finished_test, procs_needed in finished_tests:
            self._procs_avail += procs_needed
            del threads_in_flight[finished_test]

    ###########################################################################
    def _update_test_status_file(self, test, test_phase, status):
    ###########################################################################
        """
        In general, test_scheduler should not be responsible for updating
        the TestStatus file, but there are a few cases where it has to.
        """
        test_dir = self._get_test_dir(test)
        with TestStatus(test_dir=test_dir, test_name=test) as ts:
            ts.set_status(test_phase, status)

    ###########################################################################
    def _consumer(self, test, test_phase, phase_method):
    ###########################################################################
        before_time = time.time()
        success = self._run_catch_exceptions(test, test_phase, phase_method)
        elapsed_time = time.time() - before_time
        status  = (TEST_PEND_STATUS if test_phase == RUN_PHASE and not \
                   self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS

        if status != TEST_PEND_STATUS:
            self._update_test_status(test, test_phase, status)

        status_str = "Finished %s for test %s in %f seconds (%s)" %\
                     (test_phase, test, elapsed_time, status)
        if not success:
            status_str += "    Case dir: %s" % self._get_test_dir(test)
        logger.info(status_str)

        if test_phase in [CREATE_NEWCASE_PHASE, XML_PHASE, NAMELIST_PHASE]:
            # These are the phases for which TestScheduler is reponsible for
            # updating the TestStatus file
            nl_problem = self._get_test_data(test)[2]
            status = TEST_FAIL_STATUS if nl_problem and test_phase == NAMELIST_PHASE else status
            self._update_test_status_file(test, test_phase, status)

        # On batch systems, we want to immediately submit to the queue, because
        # it's very cheap to submit and will get us a better spot in line
        if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE):
            logger.info("Starting %s for test %s with 1 proc on interactive node and %d procs on compute nodes" %
                        (RUN_PHASE, test, self._get_procs_needed(test, RUN_PHASE, no_batch=True)))
            self._update_test_status(test, RUN_PHASE, TEST_PEND_STATUS)
            self._consumer(test, RUN_PHASE, self._run_phase)

    ###########################################################################
    def _producer(self):
    ###########################################################################
        threads_in_flight = {} # test-name -> (thread, procs, phase)
        while True:
            work_to_do = False
            num_threads_launched_this_iteration = 0
            for test in self._tests:
                logger.debug("test_name: " + test)
                # If we have no workers available, immediately wait
                if len(threads_in_flight) == self._parallel_jobs:
                    self._wait_for_something_to_finish(threads_in_flight)

                if self._work_remains(test):
                    work_to_do = True
                    if test not in threads_in_flight:
                        test_phase, test_status, _ = self._get_test_data(test)
                        expect(test_status != TEST_PEND_STATUS, test)
                        next_phase = self._phases[self._phases.index(test_phase) + 1]
                        procs_needed = self._get_procs_needed(test, next_phase, threads_in_flight)

                        if procs_needed <= self._procs_avail:
                            self._procs_avail -= procs_needed

                            # Necessary to print this way when multiple threads printing
                            logger.info("Starting %s for test %s with %d procs" %
                                        (next_phase, test, procs_needed))

                            self._update_test_status(test, next_phase, TEST_PEND_STATUS)
                            new_thread = threading.Thread(target=self._consumer,
                                args=(test, next_phase, getattr(self, "_%s_phase" % next_phase.lower())) )
                            threads_in_flight[test] = (new_thread, procs_needed, next_phase)
                            new_thread.start()
                            num_threads_launched_this_iteration += 1
                        else:
                            if not threads_in_flight:
                                msg = "Phase '%s' for test '%s' required more processors, %d, than this machine can provide, %d" % \
                                    (next_phase, test, procs_needed, self._procs_avail)
                                logger.warning(msg)
                                self._update_test_status(test, next_phase, TEST_PEND_STATUS)
                                self._update_test_status(test, next_phase, TEST_FAIL_STATUS)
                                self._log_output(test, msg)
                                self._update_test_status_file(test, next_phase, TEST_FAIL_STATUS)
                                num_threads_launched_this_iteration += 1

            if not work_to_do:
                break

            if num_threads_launched_this_iteration == 0:
                # No free resources, wait for something in flight to finish
                self._wait_for_something_to_finish(threads_in_flight)

        for unfinished_thread, _, _ in threads_in_flight.values():
            unfinished_thread.join()

    ###########################################################################
    def _setup_cs_files(self):
    ###########################################################################
        try:
            python_libs_root = CIME.utils.get_python_libs_root()
            template_file = os.path.join(python_libs_root, "cs.status.template")
            template = open(template_file, "r").read()
            template = template.replace("<PATH>",
                                        os.path.join(self._cime_root,"scripts","Tools")).replace\
                                        ("<TESTID>", self._test_id)
            cs_status_file = os.path.join(self._test_root, "cs.status.%s" % self._test_id)
            with open(cs_status_file, "w") as fd:
                fd.write(template)
            os.chmod(cs_status_file, os.stat(cs_status_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            template_file = os.path.join(python_libs_root, "cs.submit.template")
            template = open(template_file, "r").read()
            setup_cmd = "./case.setup" if self._no_setup else ":"
            build_cmd = "./case.build" if self._no_build else ":"
            test_cmd  = "./case.submit"
            template = template.replace("<SETUP_CMD>", setup_cmd).\
                       replace("<BUILD_CMD>", build_cmd).\
                       replace("<RUN_CMD>", test_cmd).\
                       replace("<TESTID>", self._test_id)

            if self._no_run:
                cs_submit_file = os.path.join(self._test_root, "cs.submit.%s" % self._test_id)
                with open(cs_submit_file, "w") as fd:
                    fd.write(template)
                os.chmod(cs_submit_file,
                         os.stat(cs_submit_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            if CIME.utils.get_model == "cesm":
                testreporter =  os.path.join(self._test_root,"testreporter.pl")
                shutil.copy(os.path.join(self._cime_root,"scripts","Testing","testreporter.pl"),
                            testreporter)
                os.chmod(testreporter, os.stat(testreporter).st_mode | stat.S_IXUSR | stat.S_IXGRP)

        except Exception as e:
            logger.warning("FAILED to set up cs files: %s" % str(e))

    ###########################################################################
    def run_tests(self):
    ###########################################################################
        """
        Main API for this class.

        Return True if all tests passed.
        """
        start_time = time.time()

        # Tell user what will be run
        logger.info( "RUNNING TESTS:")
        for test in self._tests:
            logger.info( "  %s"% test)

        # TODO - documentation

        self._producer()

        expect(threading.active_count() == 1, "Leftover threads?")

        # Setup cs files
        self._setup_cs_files()

        # Return True if all tests passed from our point of view
        logger.info( "At test-scheduler close, state is:")
        rv = True
        for test in self._tests:
            phase, status, nl_fail = self._get_test_data(test)

            if status == TEST_PASS_STATUS and phase == RUN_PHASE:
                # Be cautious about telling the user that the test passed. This
                # status should match what they would see on the dashboard. Our
                # self._test_states does not include comparison fail information,
                # so we need to parse test status.
                ts = TestStatus(self._get_test_dir(test))
                status = ts.get_overall_test_status()

            if status not in [TEST_PASS_STATUS, TEST_PEND_STATUS]:
                logger.info( "%s %s (phase %s)" % (status, test, phase))
                rv = False

            elif nl_fail:
                logger.info( "%s %s (but otherwise OK)" % (NAMELIST_FAIL_STATUS, test))
                rv = False

            else:
                logger.info("%s %s %s" % (status, test, phase))

            logger.info( "    Case dir: %s" % self._get_test_dir(test))

        logger.info( "test-scheduler took %s seconds"% (time.time() - start_time))

        return rv
Ejemplo n.º 26
0
    def _compare_baseline(self):
        with self._test_status as ts:
            ts.set_status(CIME.test_status.BASELINE_PHASE,
                          CIME.test_status.TEST_FAIL_STATUS)

            run_dir = self._case.get_value("RUNDIR")
            case_name = self._case.get_value("CASE")
            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
                                    self._case.get_value("BASECMP_CASE"))

            test_name = "{}".format(case_name.split('.')[-1])
            evv_config = {
                test_name: {
                    "module": os.path.join(evv_lib_dir, "extensions",
                                           "tsc.py"),
                    "test-case": case_name,
                    "test-dir": run_dir,
                    "ref-case": "Baseline",
                    "ref-dir": base_dir,
                    "time-slice": [OUT_FREQ, SIM_LENGTH],
                    "inspect-times": INSPECT_AT,
                    "variables": VAR_LIST,
                    "p-threshold": P_THRESHOLD,
                }
            }

            json_file = os.path.join(run_dir, '.'.join([case_name, 'json']))
            with open(json_file, 'w') as config_file:
                json.dump(evv_config, config_file, indent=4)

            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
            evv(['-e', json_file, '-o', evv_out_dir])

            with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
                evv_status = json.load(evv_f)

            comments = ""
            for evv_elem in evv_status['Data']['Elements']:
                if evv_elem['Type'] == 'ValSummary' \
                        and evv_elem['TableTitle'] == 'Time step convergence test':
                    comments = "; ".join("{}: {}".format(key, val)
                                         for key, val in evv_elem['Data']
                                         [test_name][''].items())
                    if evv_elem['Data'][test_name]['']['Test status'].lower(
                    ) == 'pass':
                        self._test_status.set_status(
                            CIME.test_status.BASELINE_PHASE,
                            CIME.test_status.TEST_PASS_STATUS)
                    break

            status = self._test_status.get_status(
                CIME.test_status.BASELINE_PHASE)
            mach_name = self._case.get_value("MACH")
            mach_obj = Machines(machine=mach_name)
            htmlroot = CIME.utils.get_htmlroot(mach_obj)
            urlroot = CIME.utils.get_urlroot(mach_obj)
            if htmlroot is not None:
                with CIME.utils.SharedArea():
                    dir_util.copy_tree(evv_out_dir,
                                       os.path.join(htmlroot, 'evv',
                                                    case_name),
                                       preserve_mode=False)
                if urlroot is None:
                    urlroot = "[{}_URL]".format(mach_name.capitalize())
                viewing = "{}/evv/{}/index.html".format(urlroot, case_name)
            else:
                viewing = "{}\n" \
                          "    EVV viewing instructions can be found at: " \
                          "        https://github.com/E3SM-Project/E3SM/blob/master/cime/scripts/" \
                          "climate_reproducibility/README.md#test-passfail-and-extended-output" \
                          "".format(evv_out_dir)

            comments = "{} {} for test '{}'.\n" \
                       "    {}\n" \
                       "    EVV results can be viewed at:\n" \
                       "        {}".format(CIME.test_status.BASELINE_PHASE, status, test_name, comments, viewing)

            CIME.utils.append_testlog(comments, self._orig_caseroot)
Ejemplo n.º 27
0
    def __init__(self, test_names,
                 no_run=False, no_build=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None,compiler=None,
                 baseline_root=None, baseline_name=None,
                 clean=False,compare=False, generate=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 xml_machine=None, xml_compiler=None, xml_category=None,xml_testlist=None):
    ###########################################################################
        self._cime_root = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        # needed for perl interface
        os.environ["CIMEROOT"] = self._cime_root
        self._machobj   = Machines(machine=machine_name)
        machine_name    = self._machobj.get_machine_name()

        self._no_build = no_build      if not namelists_only else True
        self._no_run   = no_run        if not self._no_build else True

        # Figure out what project to use
        if (project is None):
            self._project = CIME.utils.get_project()
            if (self._project is None):
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()

        self._test_root = test_root if test_root is not None else self._machobj.get_value("CESMSCRATCHROOT")
        if (self._project is not None):
            self._test_root = self._test_root.replace("$PROJECT", self._project)
        self._test_root = os.path.abspath(self._test_root)

        self._test_id = test_id if test_id is not None else CIME.utils.get_utc_timestamp()

        self._compiler = compiler if compiler is not None else self._machobj.get_default_compiler()
        expect(self._machobj.is_valid_compiler(self._compiler),
               "Compiler %s not valid for machine %s" % (self._compiler,machine_name))

        self._clean          = clean
        self._namelists_only = namelists_only

        # Extra data associated with tests, do not modify after construction
        # test_name -> test_data
        #   test_data: name -> value
        self._test_data      = {}

        # If xml options are provided get tests from xml file, otherwise use acme dictionary
        if(not test_names and (xml_machine is not None or xml_category is not None or xml_compiler is not None or xml_testlist is not None)):
            test_data = CIME.test_utils.get_tests_from_xml(xml_machine, xml_category, xml_compiler, xml_testlist, machine_name, compiler)
            test_names = [item["name"] for item in test_data]
            for test_datum in test_data:
                self._test_data[test_datum["name"]] = test_datum
        else:
            expect(len(test_names) > 0, "No tests to run")
            test_names = update_acme_tests.get_full_test_names(test_names, machine_name, self._compiler)

        if (parallel_jobs is None):
            self._parallel_jobs  = min(len(test_names), int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = None
        self._baseline_gen_name = None
        self._compare = False
        self._generate = False
        if (compare or generate):
            # Figure out what baseline name to use
            if (baseline_name is None):
                if(compare is not None and isinstance(compare,str)):
                    self._baseline_cmp_name = compare
                    self._compare = True
                if(generate is not None and isinstance(generate,str)):
                    self._baseline_gen_name = generate
                    self._generate = True
                branch_name = CIME.utils.get_current_branch(repo=self._cime_root)
                expect(branch_name is not None, "Could not determine baseline name from branch, please use -b option")
                if(self._compare and self._baseline_cmp_name is None):
                    self._baseline_cmp_name = os.path.join(self._compiler, branch_name)
                if(self._generate and self._baseline_gen_name is None):
                    self._baseline_gen_name = os.path.join(self._compiler, branch_name)
            else:
                if(compare):
                    self._compare = True
                    self._baseline_cmp_name  = baseline_name
                    if (not self._baseline_cmp_name.startswith("%s/" % self._compiler)):
                        self._baseline_cmp_name = os.path.join(self._compiler, self._baseline_cmp_name)
                if(generate):
                    self._generate = True
                    self._baseline_gen_name  = baseline_name
                    if (not self._baseline_gen_name.startswith("%s/" % self._compiler)):
                        self._baseline_gen_name = os.path.join(self._compiler, self._baseline_gen_name)

            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None else self._machobj.get_value("CCSM_BASELINE")
            if (self._project is not None):
                self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)
            self._baseline_root = os.path.abspath(self._baseline_root)

            if (self._compare):
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory %s" % full_baseline_dir)
        else:
            self._baseline_root = None
        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # Since the name-list phase can fail without aborting later phases, we
        # need some extra state to remember tests that had namelist problems.
        # name -> (phase, status, has_namelist_problem)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (INITIAL_PHASE, TEST_PASS_STATUS, False)

        # Oversubscribe by 1/4
        pes = int(self._machobj.get_value("PES_PER_NODE"))
        self._proc_pool = int(pes * 1.25)

        # Setup phases
        self._phases = list(PHASES)
        if (no_build):
            self._phases.remove(BUILD_PHASE)
        if (no_run):
            self._phases.remove(RUN_PHASE)
        if (not self._compare and not self._generate):
            self._phases.remove(NAMELIST_PHASE)

        # None of the test directories should already exist.
        for test in self._tests:
            expect(not os.path.exists(self._get_test_dir(test)),
                   "Cannot create new case in directory '%s', it already exists. Pick a different test-id" % self._get_test_dir(test))
Ejemplo n.º 28
0
class EnvModule(object):

    # TODO - write env_mach_specific files into case

    # Public API

    def __init__(self, machine, compiler, cimeroot, caseroot, mpilib, debug=False):
        self._machine  = Machines(machine=machine)
        self._compiler = compiler
        self._cimeroot = cimeroot
        self._caseroot = caseroot
        self._mpilib   = mpilib
        self._debug    = debug

        self._module_system = self._machine.get_module_system_type()

    def load_env_for_case(self):
        mach_specific = EnvMachSpecific(caseroot=self._caseroot)

        module_nodes = mach_specific.get_node("modules")
        env_nodes    = mach_specific.get_node("environment_variables")

        if (module_nodes is not None):
            modules_to_load = self._compute_module_actions(module_nodes)
            self.load_modules(modules_to_load)
        if (env_nodes is not None):
            envs_to_set = self._compute_env_actions(env_nodes)
            self.load_envs(envs_to_set)

    def load_modules(self, modules_to_load):
        if (self._module_system == "module"):
            self._load_module_modules(modules_to_load)
        elif (self._module_system == "soft"):
            self._load_soft_modules(modules_to_load)
        elif (self._module_system == "dotkit"):
            self._load_dotkit_modules(modules_to_load)
        elif (self._module_system == "none"):
            self._load_none_modules(modules_to_load)
        else:
            expect(False, "Unhandled module system '%s'" % self._module_system)

    def load_envs(self, envs_to_set):
        for env_name, env_value in envs_to_set:
            # Let bash do the work on evaluating and resolving env_value
            os.environ[env_name] = run_cmd("echo %s" % env_value)

    # Private API

    def _compute_module_actions(self, module_nodes):
        return self._compute_actions(module_nodes, "command")

    def _compute_env_actions(self, env_nodes):
        return self._compute_actions(env_nodes, "env")

    def _compute_actions(self, nodes, child_tag):
        result = [] # list of tuples ("name", "argument")

        for node in nodes:
            if (self._match_attribs(node.attrib)):
                for child in node:
                    expect(child.tag == child_tag, "Expected %s element" % child_tag)
                    result.append( (child.get("name"), child.text) )

        return result

    def _match_attribs(self, attribs):
        if ("compiler" in attribs and
            not self._match(self._compiler, attribs["compiler"])):
            return False
        elif ("mpilib" in attribs and
            not self._match(self._mpilib, attribs["mpilib"])):
            return False
        elif ("debug" in attribs and
            not self._match("TRUE" if self._debug else "FALSE", attribs["debug"].upper())):
            return False

        return True

    def _match(self, my_value, xml_value):
        if (xml_value.startswith("!")):
            return my_value != xml_value[1:]
        else:
            return my_value == xml_value

    def _load_module_modules(self, modules_to_load):
        python_mod_cmd = self._machine.get_module_system_cmd_path("python")
        for action, argument in modules_to_load:
            cmd = "%s %s %s" % (python_mod_cmd, action, argument)
            py_module_code = run_cmd(cmd)
            exec(py_module_code)

    def _load_soft_modules(self, modules_to_load):
        expect(False, "Not yet implemented")

    def _load_dotkit_modules(self, modules_to_load):
        expect(False, "Not yet implemented")

    def _load_none_modules(self, modules_to_load):
        expect(False, "Not yet implemented")
Ejemplo n.º 29
0
def load_balancing_submit(compset, res, pesfile, mpilib, compiler, project,
                          machine, extra_options_file, test_id, force_purge,
                          test_root):
    ################################################################################
    # Read in list of pes from given file
    if not os.access(pesfile, os.R_OK):
        logger.critical('ERROR: File %s not found', pesfile)
        raise SystemExit(1)
    logger.info('Reading XML file %s. Searching for pesize entries:', pesfile)
    try:
        pesobj = Pes(pesfile)
    except ParseError:
        logger.critical('ERROR: File %s not parseable', pesfile)
        raise SystemExit(1)

    pesize_list = []
    for node in pesobj.get_nodes('pes'):
        pesize = node.get('pesize')
        if not pesize:
            logger.critical('No pesize for pes node in file %s', pesfile)
        if pesize in pesize_list:
            logger.critical('pesize %s duplicated in file %s', pesize, pesfile)
        pesize_list.append(pesize)

    if not pesize_list:
        logger.critical('ERROR: No grid entries found in pes file %s', pesfile)
        raise SystemExit(1)

    machobj = Machines(machine=machine)
    if test_root is None:
        test_root = machobj.get_value("CIME_OUTPUT_ROOT")
    if machine is None:
        machine = machobj.get_machine_name()
        print "machine is {}".format(machine)
    if compiler is None:
        compiler = machobj.get_default_compiler()
        print "compiler is {}".format(compiler)
    if mpilib is None:
        mpilib = machobj.get_default_MPIlib({"compiler": compiler})

    test_names = []
    for i in xrange(len(pesize_list)):
        test_names.append(
            get_full_test_name("PFS_I{}".format(i),
                               grid=res,
                               compset=compset,
                               machine=machine,
                               compiler=compiler))
        casedir = os.path.join(test_root, test_names[-1] + "." + test_id)
        print "casedir is {}".format(casedir)
        if os.path.isdir(casedir):
            if force_purge:
                logger.info('Removing directory %s', casedir)
                shutil.rmtree(casedir)
            else:
                expect(
                    False,
                    "casedir {} already exists, use the --force-purge option, --test-root or"
                    " --test-id options".format(casedir))

    tests = TestScheduler(test_names,
                          no_setup=True,
                          compiler=compiler,
                          machine_name=machine,
                          mpilib=mpilib,
                          test_root=test_root,
                          test_id=test_id,
                          project=project)
    success = tests.run_tests(wait=True)
    expect(success, "Error in creating cases")
    testnames = []
    for test in tests.get_testnames():
        testname = os.path.join(test_root, test + "." + test_id)
        testnames.append(testname)
        logger.info("test is {}".format(testname))
        with Case(testname) as case:
            pes_ntasks, pes_nthrds, pes_rootpe, _ = \
                                                    pesobj.find_pes_layout('any', 'any', 'any', pesize_opts=pesize_list.pop(0))
            for key in pes_ntasks:
                case.set_value(key, pes_ntasks[key])
            for key in pes_nthrds:
                case.set_value(key, pes_nthrds[key])
            for key in pes_rootpe:
                case.set_value(key, pes_rootpe[key])
Ejemplo n.º 30
0
def query_cime(machine, param):
    ###############################################################################
    mach_obj = Machines(machine=machine)
    return mach_obj.get_value(param)
Ejemplo n.º 31
0
    os.chdir(caseroot)

    non_local = case.get_value("NONLOCAL")

    models = case.get_values("COMP_CLASSES")
    mach = case.get_value("MACH")
    compiler = case.get_value("COMPILER")
    debug = case.get_value("DEBUG")
    mpilib = case.get_value("MPILIB")
    sysos = case.get_value("OS")
    comp_interface = case.get_value("COMP_INTERFACE")
    extra_machines_dir = case.get_value("EXTRA_MACHDIR")

    expect(mach is not None, "xml variable MACH is not set")

    mach_obj = Machines(machine=mach, extra_machines_dir=extra_machines_dir)

    # Check that $DIN_LOC_ROOT exists or can be created:
    if not non_local:
        din_loc_root = case.get_value("DIN_LOC_ROOT")
        testcase = case.get_value("TESTCASE")

        if not os.path.isdir(din_loc_root):
            try:
                os.makedirs(din_loc_root)
            except OSError as e:
                if e.errno == errno.EACCES:
                    logger.info("Invalid permissions to create {}".format(
                        din_loc_root))

        expect(
Ejemplo n.º 32
0
def create_cdash_xml(results,
                     cdash_build_name,
                     cdash_project,
                     cdash_build_group,
                     force_log_upload=False):
    ###############################################################################

    #
    # Create dart config file
    #

    current_time = time.time()

    utc_time_tuple = time.gmtime(current_time)
    cdash_timestamp = time.strftime("%H:%M:%S", utc_time_tuple)

    hostname = Machines().get_machine_name()
    if hostname is None:
        hostname = socket.gethostname().split(".")[0]
        logging.warning(
            "Could not convert hostname '{}' into an E3SM machine name".format(
                hostname))

    for drop_method in ["https", "http"]:
        dart_config = """
SourceDirectory: {0}
BuildDirectory: {0}

# Site is something like machine.domain, i.e. pragmatic.crd
Site: {1}

# Build name is osname-revision-compiler, i.e. Linux-2.4.2-2smp-c++
BuildName: {2}

# Submission information
IsCDash: TRUE
CDashVersion:
QueryCDashVersion:
DropSite: my.cdash.org
DropLocation: /submit.php?project={3}
DropSiteUser:
DropSitePassword:
DropSiteMode:
DropMethod: {6}
TriggerSite:
ScpCommand: {4}

# Dashboard start time
NightlyStartTime: {5} UTC

UseLaunchers:
CurlOptions: CURLOPT_SSL_VERIFYPEER_OFF;CURLOPT_SSL_VERIFYHOST_OFF
""".format(
            os.getcwd(),
            hostname,
            cdash_build_name,
            cdash_project,
            find_executable("scp"),
            cdash_timestamp,
            drop_method,
        )

        with open("DartConfiguration.tcl", "w") as dart_fd:
            dart_fd.write(dart_config)

        utc_time = time.strftime("%Y%m%d-%H%M", utc_time_tuple)
        testing_dir = os.path.join("Testing", utc_time)
        if os.path.isdir(testing_dir):
            shutil.rmtree(testing_dir)

        os.makedirs(os.path.join("Testing", utc_time))

        # Make tag file
        with open("Testing/TAG", "w") as tag_fd:
            tag_fd.write("{}\n{}\n".format(utc_time, cdash_build_group))

        create_cdash_xml_fakes(
            results,
            cdash_build_name,
            cdash_build_group,
            utc_time,
            current_time,
            hostname,
        )

        create_cdash_upload_xml(
            results,
            cdash_build_name,
            cdash_build_group,
            utc_time,
            hostname,
            force_log_upload,
        )

        stat, out, _ = run_cmd("ctest -VV -D NightlySubmit",
                               combine_output=True)
        if stat != 0:
            logging.warning("ctest upload drop method {} FAILED:\n{}".format(
                drop_method, out))
        else:
            logging.info("Upload SUCCESS:\n{}".format(out))
            return

    expect(False, "All cdash upload attempts failed")
Ejemplo n.º 33
0
            "Macros.cmake"
        ]
        for file_to_clean in files_to_clean:
            if os.path.exists(file_to_clean) and not (keep and file_to_clean
                                                      in keep):
                os.remove(file_to_clean)
                logger.info("Successfully cleaned {}".format(file_to_clean))

        if not test_mode:
            # rebuild the models (even on restart)
            case.set_value("BUILD_COMPLETE", False)

        # Cannot leave case in bad state (missing env_mach_specific.xml)
        if clean and not os.path.isfile("env_mach_specific.xml"):
            case.flush()
            configure(Machines(machine=mach,
                               extra_machines_dir=extra_machines_dir),
                      caseroot, ["Makefile"],
                      compiler,
                      mpilib,
                      debug,
                      comp_interface,
                      sysos,
                      noenv=True,
                      extra_machines_dir=extra_machines_dir)
            case.read_xml()

    if not clean:
        if not non_local:
            case.load_env()

        # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler
def create_cdash_xml(results, cdash_build_name, cdash_project,
                     cdash_build_group):
    ###############################################################################

    #
    # Create dart config file
    #

    current_time = time.time()

    utc_time_tuple = time.gmtime(current_time)
    cdash_timestamp = time.strftime("%H:%M:%S", utc_time_tuple)

    hostname = Machines().get_machine_name()
    if (hostname is None):
        hostname = socket.gethostname().split(".")[0]
        logging.warning(
            "Could not convert hostname '{}' into an ACME machine name".format(
                hostname))

    dart_config = \
"""
SourceDirectory: {0}
BuildDirectory: {0}

# Site is something like machine.domain, i.e. pragmatic.crd
Site: {1}

# Build name is osname-revision-compiler, i.e. Linux-2.4.2-2smp-c++
BuildName: {2}

# Submission information
IsCDash: TRUE
CDashVersion:
QueryCDashVersion:
DropSite: my.cdash.org
DropLocation: /submit.php?project={3}
DropSiteUser:
DropSitePassword:
DropSiteMode:
DropMethod: http
TriggerSite:
ScpCommand: {4}

# Dashboard start time
NightlyStartTime: {5} UTC
""".format(os.getcwd(), hostname, cdash_build_name, cdash_project,
           distutils.spawn.find_executable("scp"), cdash_timestamp)

    with open("DartConfiguration.tcl", "w") as dart_fd:
        dart_fd.write(dart_config)

    utc_time = time.strftime('%Y%m%d-%H%M', utc_time_tuple)
    os.makedirs(os.path.join("Testing", utc_time))

    # Make tag file
    with open("Testing/TAG", "w") as tag_fd:
        tag_fd.write("{}\n{}\n".format(utc_time, cdash_build_group))

    create_cdash_test_xml(results, cdash_build_name, cdash_build_group,
                          utc_time, current_time, hostname)

    create_cdash_upload_xml(results, cdash_build_name, cdash_build_group,
                            utc_time, hostname)

    CIME.utils.run_cmd_no_fail("ctest -VV -D NightlySubmit", verbose=True)
Ejemplo n.º 35
0
    def _compare_baseline(self):
        with self._test_status:
            if int(self._case.get_value("RESUBMIT")) > 0:
                # This is here because the comparison is run for each submission
                # and we only want to compare once the whole run is finished. We
                # need to return a pass here to continue the submission process.
                self._test_status.set_status(
                    CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_PASS_STATUS
                )
                return

            self._test_status.set_status(
                CIME.test_status.BASELINE_PHASE, CIME.test_status.TEST_FAIL_STATUS
            )

            run_dir = self._case.get_value("RUNDIR")
            case_name = self._case.get_value("CASE")
            base_dir = os.path.join(
                self._case.get_value("BASELINE_ROOT"),
                self._case.get_value("BASECMP_CASE"),
            )

            test_name = "{}".format(case_name.split(".")[-1])
            evv_config = {
                test_name: {
                    "module": os.path.join(evv_lib_dir, "extensions", "ks.py"),
                    "test-case": "Test",
                    "test-dir": run_dir,
                    "ref-case": "Baseline",
                    "ref-dir": base_dir,
                    "var-set": "default",
                    "ninst": NINST,
                    "critical": 13,
                    "component": self.component,
                }
            }

            json_file = os.path.join(run_dir, ".".join([case_name, "json"]))
            with open(json_file, "w") as config_file:
                json.dump(evv_config, config_file, indent=4)

            evv_out_dir = os.path.join(run_dir, ".".join([case_name, "evv"]))
            evv(["-e", json_file, "-o", evv_out_dir])

            with open(os.path.join(evv_out_dir, "index.json")) as evv_f:
                evv_status = json.load(evv_f)

            comments = ""
            for evv_ele in evv_status["Page"]["elements"]:
                if "Table" in evv_ele:
                    comments = "; ".join(
                        "{}: {}".format(key, val[0])
                        for key, val in evv_ele["Table"]["data"].items()
                    )
                    if evv_ele["Table"]["data"]["Test status"][0].lower() == "pass":
                        self._test_status.set_status(
                            CIME.test_status.BASELINE_PHASE,
                            CIME.test_status.TEST_PASS_STATUS,
                        )
                    break

            status = self._test_status.get_status(CIME.test_status.BASELINE_PHASE)
            mach_name = self._case.get_value("MACH")
            mach_obj = Machines(machine=mach_name)
            htmlroot = CIME.utils.get_htmlroot(mach_obj)
            urlroot = CIME.utils.get_urlroot(mach_obj)
            if htmlroot is not None:
                with CIME.utils.SharedArea():
                    dir_util.copy_tree(
                        evv_out_dir,
                        os.path.join(htmlroot, "evv", case_name),
                        preserve_mode=False,
                    )
                if urlroot is None:
                    urlroot = "[{}_URL]".format(mach_name.capitalize())
                viewing = "{}/evv/{}/index.html".format(urlroot, case_name)
            else:
                viewing = (
                    "{}\n"
                    "    EVV viewing instructions can be found at: "
                    "        https://github.com/E3SM-Project/E3SM/blob/master/cime/scripts/"
                    "climate_reproducibility/README.md#test-passfail-and-extended-output"
                    "".format(evv_out_dir)
                )

            comments = (
                "{} {} for test '{}'.\n"
                "    {}\n"
                "    EVV results can be viewed at:\n"
                "        {}".format(
                    CIME.test_status.BASELINE_PHASE,
                    status,
                    test_name,
                    comments,
                    viewing,
                )
            )

            CIME.utils.append_testlog(comments, self._orig_caseroot)
Ejemplo n.º 36
0
class TestScheduler(object):
    ###############################################################################

    ###########################################################################
    def __init__(self,
                 test_names,
                 test_data=None,
                 no_run=False,
                 no_build=False,
                 no_setup=False,
                 no_batch=None,
                 test_root=None,
                 test_id=None,
                 machine_name=None,
                 compiler=None,
                 baseline_root=None,
                 baseline_cmp_name=None,
                 baseline_gen_name=None,
                 clean=False,
                 namelists_only=False,
                 project=None,
                 parallel_jobs=None,
                 walltime=None,
                 proc_pool=None,
                 use_existing=False,
                 save_timing=False,
                 queue=None,
                 allow_baseline_overwrite=False,
                 output_root=None):
        ###########################################################################
        self._cime_root = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        self._allow_baseline_overwrite = allow_baseline_overwrite
        self._save_timing = save_timing
        self._queue = queue
        self._test_data = {} if test_data is None else test_data  # Format:  {test_name -> {data_name -> data}}

        self._machobj = Machines(machine=machine_name)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run = no_run or self._no_build
        self._output_root = output_root
        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        if test_root is not None:
            self._test_root = test_root
        elif self._output_root is not None:
            self._test_root = self._output_root
        else:
            self._test_root = self._machobj.get_value("CIME_OUTPUT_ROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT",
                                                      self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id = test_id if test_id is not None else CIME.utils.get_timestamp(
        )

        self._compiler = self._machobj.get_default_compiler(
        ) if compiler is None else compiler

        self._clean = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(
                len(test_names),
                int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name  # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name  # Implies generation should be done if not None

        if baseline_cmp_name or baseline_gen_name:
            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None \
                else self._machobj.get_value("BASELINE_ROOT")

            if self._project is not None:
                self._baseline_root = self._baseline_root.replace(
                    "$PROJECT", self._project)

            self._baseline_root = os.path.abspath(self._baseline_root)

            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root,
                                                 self._baseline_cmp_name)
                expect(
                    os.path.isdir(full_baseline_dir),
                    "Missing baseline comparison directory %s" %
                    full_baseline_dir)

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root,
                                                 self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)
                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                           "Baseline directories already exists %s\n"\
                           "Use --allow_baseline_overwrite to avoid this error"%existing_baselines)
        else:
            self._baseline_root = None

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # name -> (phase, status)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("PES_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)

        if use_existing:
            for test in self._tests:
                ts = TestStatus(self._get_test_dir(test))
                for phase, status in ts:
                    if phase in CORE_PHASES:
                        if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                            # We need to pick up here
                            break
                        else:
                            self._update_test_status(test, phase,
                                                     TEST_PEND_STATUS)
                            self._update_test_status(test, phase, status)
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(
                    not os.path.exists(self._get_test_dir(test)),
                    "Cannot create new case in directory '%s', it already exists."
                    " Pick a different test-id" % self._get_test_dir(test))

        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def _log_output(self, test, output):
        ###########################################################################
        test_dir = self._get_test_dir(test)
        if not os.path.isdir(test_dir):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)
        append_status(output, caseroot=test_dir, sfile="TestStatus.log")

    ###########################################################################
    def _get_case_id(self, test):
        ###########################################################################
        baseline_action_code = ""
        if self._baseline_gen_name:
            baseline_action_code += "G"
        if self._baseline_cmp_name:
            baseline_action_code += "C"
        if len(baseline_action_code) > 0:
            return "%s.%s.%s" % (test, baseline_action_code, self._test_id)
        else:
            return "%s.%s" % (test, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
        ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
        ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
        ###########################################################################
        status = self._get_test_status(test)
        return status != TEST_PASS_STATUS and status != TEST_PEND_STATUS

    ###########################################################################
    def _work_remains(self, test):
        ###########################################################################
        test_phase, test_status = self._get_test_data(test)
        return (test_status == TEST_PASS_STATUS or test_status == TEST_PEND_STATUS) and\
            test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
        ###########################################################################
        curr_phase, curr_status = self._get_test_data(test)
        if phase is None or phase == curr_phase:
            return curr_status
        else:
            expect(
                phase is None
                or self._phases.index(phase) < self._phases.index(curr_phase),
                "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
        ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
        ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status = self._get_test_data(test)

        if old_phase == phase:
            expect(
                old_status == TEST_PEND_STATUS,
                "Only valid to transition from PEND to something else, found '%s' for phase '%s'"
                % (old_status, phase))
            expect(status != TEST_PEND_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(
                old_status == TEST_PASS_STATUS,
                "Why did we move on to next phase when prior phase did not pass?"
            )
            expect(status == TEST_PEND_STATUS,
                   "New phase should be set to pending status")
            expect(
                self._phases.index(old_phase) == phase_idx - 1,
                "Skipped phase? %s %s" % (old_phase, phase_idx))

        # Must be atomic
        self._tests[test] = (phase, status)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
        ###########################################################################
        while True:
            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
            if rc != 0:
                self._log_output(
                    test,
                    "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s"
                    % (phase, test, cmd, output, errput))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if "bad interpreter" in errput:
                    time.sleep(1)
                    continue
                else:
                    break
            else:
                # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                # succeeded was the submission.
                if phase != RUN_PHASE or self._no_batch:
                    self._log_output(
                        test,
                        "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s" %
                        (phase, test, cmd, output))
                break

        return rc == 0

    ###########################################################################
    def _create_newcase_phase(self, test):
        ###########################################################################
        test_dir = self._get_test_dir(test)

        _, case_opts, grid, compset,\
            machine, compiler, test_mods = CIME.utils.parse_test_name(test)

        create_newcase_cmd = "%s --case %s --res %s --mach %s --compiler %s --compset %s"\
                               " --test" % \
                              (os.path.join(self._cime_root, "scripts", "create_newcase"),
                               test_dir, grid, machine, compiler, compset)
        if self._project is not None:
            create_newcase_cmd += " --project %s " % self._project
        if self._output_root is not None:
            create_newcase_cmd += " --output-root %s " % self._output_root

        if test_mods is not None:
            files = Files()
            (component, modspath) = test_mods.split('/', 1)
            testmods_dir = files.get_value("TESTS_MODS_DIR",
                                           {"component": component})

            test_mod_file = os.path.join(testmods_dir, component, modspath)
            if not os.path.exists(test_mod_file):
                self._log_output(test,
                                 "Missing testmod file '%s'" % test_mod_file)
                return False
            create_newcase_cmd += " --user-mods-dir %s" % test_mod_file

        if case_opts is not None:
            for case_opt in case_opts:  # pylint: disable=not-an-iterable
                if case_opt.startswith('M'):
                    mpilib = case_opt[1:]
                    create_newcase_cmd += " --mpilib %s" % mpilib
                    logger.debug(" MPILIB set to %s" % mpilib)
                if case_opt.startswith('N'):
                    ninst = case_opt[1:]
                    create_newcase_cmd += " --ninst %s" % ninst
                    logger.debug(" NINST set to %s" % ninst)
                if case_opt.startswith('P'):
                    pesize = case_opt[1:]
                    create_newcase_cmd += " --pecount %s" % pesize

        if self._queue is not None:
            create_newcase_cmd += " --queue=%s" % self._queue

        if self._walltime is not None:
            create_newcase_cmd += " --walltime %s" % self._walltime
        elif test in self._test_data and "options" in self._test_data[test] and \
                "wallclock" in self._test_data[test]['options']:
            create_newcase_cmd += " --walltime %s" % self._test_data[test][
                'options']['wallclock']

        logger.debug("Calling create_newcase: " + create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd,
                                         CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
        ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]

        # Create, fill and write an envtest object
        test_dir = self._get_test_dir(test)
        envtest = EnvTest(test_dir)

        # Determine list of component classes that this coupler/driver knows how
        # to deal with. This list follows the same order as compset longnames follow.
        files = Files()
        drv_config_file = files.get_value("CONFIG_CPL_FILE")
        drv_comp = Component(drv_config_file)
        envtest.add_elements_by_group(files, {}, "env_test.xml")
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)
        if test in self._test_data and "options" in self._test_data[test] and \
                "memleak_tolerance" in self._test_data[test]['options']:
            envtest.set_value(
                "TEST_MEMLEAK_TOLERANCE",
                self._test_data[test]['options']['memleak_tolerance'])

        test_argv = "-testname %s -testroot %s" % (test, self._test_root)
        if self._baseline_gen_name:
            test_argv += " -generate %s" % self._baseline_gen_name
            basegen_case_fullpath = os.path.join(self._baseline_root,
                                                 self._baseline_gen_name, test)
            logger.debug("basegen_case is %s" % basegen_case_fullpath)
            envtest.set_value("BASELINE_NAME_GEN", self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE",
                              os.path.join(self._baseline_gen_name, test))
        if self._baseline_cmp_name:
            test_argv += " -compare %s" % self._baseline_cmp_name
            envtest.set_value("BASELINE_NAME_CMP", self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE",
                              os.path.join(self._baseline_cmp_name, test))

        envtest.set_value("TEST_ARGV", test_argv)
        envtest.set_value("CLEANUP", self._clean)

        if self._baseline_gen_name or self._baseline_cmp_name:
            envtest.set_value("BASELINE_ROOT", self._baseline_root)
        envtest.set_value("GENERATE_BASELINE", self._baseline_gen_name
                          is not None)
        envtest.set_value("COMPARE_BASELINE", self._baseline_cmp_name
                          is not None)
        envtest.set_value(
            "CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC",
                                                  resolved=False))

        # Add the test instructions from config_test to env_test in the case
        config_test = Tests()
        testnode = config_test.get_test_node(test_case)
        envtest.add_test(testnode)

        # Determine the test_case from the test name
        test_case, case_opts = CIME.utils.parse_test_name(test)[:2]

        # Determine case_opts from the test_case
        if case_opts is not None:
            logger.debug("case_opts are %s " % case_opts)
            for opt in case_opts:

                logger.debug("case_opt is %s" % opt)
                if opt == 'D':
                    envtest.set_test_parameter("DEBUG", "TRUE")
                    logger.debug(" DEBUG set to TRUE")

                elif opt == 'E':
                    envtest.set_test_parameter("USE_ESMF_LIB", "TRUE")
                    envtest.set_test_parameter("COMP_INTERFACE", "ESMF")
                    logger.debug(" USE_ESMF_LIB set to TRUE")
                    logger.debug(" COMP_INTERFACE set to ESMF")

                elif opt == 'CG':
                    envtest.set_test_parameter("CALENDAR", "GREGORIAN")
                    logger.debug(" CALENDAR set to %s" % opt)

                elif opt.startswith('L'):
                    match = re.match('L([A-Za-z])([0-9]*)', opt)
                    stop_option = {
                        "y": "nyears",
                        "m": "nmonths",
                        "d": "ndays",
                        "h": "nhours",
                        "s": "nseconds",
                        "n": "nsteps"
                    }
                    opt = match.group(1)
                    envtest.set_test_parameter("STOP_OPTION", stop_option[opt])
                    opti = match.group(2)
                    envtest.set_test_parameter("STOP_N", opti)
                    logger.debug(" STOP_OPTION set to %s" % stop_option[opt])
                    logger.debug(" STOP_N      set to %s" % opti)

                elif opt.startswith('M'):
                    # M option handled by create newcase
                    continue

                elif opt.startswith('P'):
                    # P option handled by create newcase
                    continue

                elif opt.startswith('N'):
                    # handled in create_newcase
                    continue
                elif opt.startswith('IOP'):
                    logger.warn("IOP test option not yet implemented")
                else:
                    expect(False, "Could not parse option '%s' " % opt)

        envtest.write()
        lockedfiles = os.path.join(test_dir, "LockedFiles")
        if not os.path.exists(lockedfiles):
            os.mkdir(lockedfiles)
        shutil.copy(os.path.join(test_dir, "env_run.xml"),
                    os.path.join(lockedfiles, "env_run.orig.xml"))

        with Case(test_dir, read_only=False) as case:
            if self._output_root is None:
                self._output_root = case.get_value("CIME_OUTPUT_ROOT")
            case.set_value(
                "SHAREDLIBROOT",
                os.path.join(self._output_root,
                             "sharedlibroot.%s" % self._test_id))
            envtest.set_initial_values(case)
            case.set_value("TEST", True)
            if self._save_timing:
                case.set_value("SAVE_TIMING", True)

        return True
Ejemplo n.º 37
0
def parse_command_line(args, description):
###############################################################################
    help_str = """
    Solve a Mixed Integer Linear Program to find a PE layout that minimizes
    the wall-clock time per model day.
    """
    parser = argparse.ArgumentParser(usage=help_str,
                                     description=description,
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    CIME.utils.setup_standard_logging_options(parser)

    parser.add_argument('--test-id', default=DEFAULT_TESTID,
                        help='test-id to use for all timing runs')

    parser.add_argument("-r", "--test-root",
                        help="Where test cases were created."
                        " Will default to output root as defined in the config_machines file")

    parser.add_argument('--timing-dir', help='alternative to using casename '
                        'to find timing data, instead read all files in'
                        ' this directory')

    parser.add_argument('--blocksize',
                        help='default minimum size of blocks to assign to all '
                        'components. Components can be assigned different '
                        'blocksizes using --blocksize_XXX. Default 1', type=int)

    for c in COMPONENT_LIST:
        parser.add_argument('--blocksize-%s' % c.lower(),
                            help='minimum blocksize for component %s, if '
                            'different from --blocksize', type=int)

    parser.add_argument('--total-tasks', type=int,
                        help='Number of pes available for assignment')

    parser.add_argument("--layout",
                        help="name of layout to solve (default selected internally)")

    parser.add_argument("--graph-models", action="store_true",
                        help="plot cost v. ntasks models. requires matplotlib")

    parser.add_argument("--print-models", action="store_true",
                        help="print all costs and ntasks")

    parser.add_argument("--pe-output", help="write pe layout to file")

    parser.add_argument('--json-output', help="write MILP data to .json file")

    parser.add_argument('--json-input', help="solve using data from .json file")

    args = CIME.utils.parse_args_and_handle_standard_logging_options(args,
                                                                     parser)
    if args.total_tasks is None and args.json_input is None:
        expect(args.total_tasks is not None or args.json_input is not None,
               "--total-tasks or --json-input option must be set")

    blocksizes = {}
    for c in COMPONENT_LIST:
        attrib = 'blocksize_%s' % c.lower()
        if getattr(args, attrib) is not None:
            blocksizes[c] = getattr(args, attrib)
        elif args.blocksize is not None:
            blocksizes[c] = args.blocksize
    test_root = args.test_root
    if test_root is None:
        machobj = Machines()
        test_root = machobj.get_value("CIME_OUTPUT_ROOT")

    return (args.test_id, test_root, args.timing_dir, blocksizes,
            args.total_tasks, args.layout, args.graph_models,
            args.print_models, args.pe_output, args.json_output,
            args.json_input)
Ejemplo n.º 38
0
    def configure(self, compset_name, grid_name, machine_name=None,
                  project=None, pecount=None, compiler=None, mpilib=None,
                  user_compset=False, pesfile=None,
                  user_grid=False, gridfile=None, ninst=1, test=False):

        #--------------------------------------------
        # compset, pesfile, and compset components
        #--------------------------------------------
        self._set_compset_and_pesfile(compset_name, user_compset=user_compset, pesfile=pesfile)

        self._components = self.get_compset_components()
        #FIXME - if --user-compset is True then need to determine that
        #all of the compset settings are valid

        #--------------------------------------------
        # grid
        #--------------------------------------------
        if user_grid is True and gridfile is not None:
            self.set_value("GRIDS_SPEC_FILE", gridfile);
        grids = Grids(gridfile)

        gridinfo = grids.get_grid_info(name=grid_name, compset=self._compsetname)
        self._gridname = gridinfo["GRID"]
        for key,value in gridinfo.items():
            logger.debug("Set grid %s %s"%(key,value))
            self.set_value(key,value)

        #--------------------------------------------
        # component config data
        #--------------------------------------------
        self._get_component_config_data()

        self.get_compset_var_settings()

        # Add the group and elements for the config_files.xml
        for idx, config_file in enumerate(self._component_config_files):
            self.set_value(config_file[0],config_file[1])

        #--------------------------------------------
        # machine
        #--------------------------------------------
        # set machine values in env_xxx files
        machobj = Machines(machine=machine_name)
        machine_name = machobj.get_machine_name()
        self.set_value("MACH",machine_name)
        nodenames = machobj.get_node_names()
        nodenames =  [x for x in nodenames if
                      '_system' not in x and '_variables' not in x and 'mpirun' not in x and\
                      'COMPILER' not in x and 'MPILIB' not in x]

        for nodename in nodenames:
            value = machobj.get_value(nodename)
            type_str = self.get_type_info(nodename)
            if type_str is not None:
                self.set_value(nodename, convert_to_type(value, type_str, nodename))

        if compiler is None:
            compiler = machobj.get_default_compiler()
        else:
            expect(machobj.is_valid_compiler(compiler),
                   "compiler %s is not supported on machine %s" %(compiler, machine_name))

        self.set_value("COMPILER",compiler)

        if mpilib is None:
            mpilib = machobj.get_default_MPIlib({"compiler":compiler})
        else:
            expect(machobj.is_valid_MPIlib(mpilib, {"compiler":compiler}),
                   "MPIlib %s is not supported on machine %s" %(mpilib, machine_name))
        self.set_value("MPILIB",mpilib)

        machdir = machobj.get_machines_dir()
        self.set_value("MACHDIR", machdir)

        # Overwriting an existing exeroot or rundir can cause problems
        exeroot = self.get_value("EXEROOT")
        rundir = self.get_value("RUNDIR")
        for wdir in (exeroot, rundir):
            if os.path.exists(wdir):
                expect(not test, "Directory %s already exists, aborting test"% wdir)
                response = raw_input("\nDirectory %s already exists, (r)eplace, (a)bort, or (u)se existing?"% wdir)
                if response.startswith("r"):
                    shutil.rmtree(wdir)
                else:
                    expect(response.startswith("u"), "Aborting by user request")

        # the following go into the env_mach_specific file
        vars = ("module_system", "environment_variables", "mpirun")
        env_mach_specific_obj = self._get_env("mach_specific")
        for var in vars:
            nodes = machobj.get_first_child_nodes(var)
            for node in nodes:
                env_mach_specific_obj.add_child(node)

        #--------------------------------------------
        # pe payout
        #--------------------------------------------
        pesobj = Pes(self._pesfile)

        #FIXME - add pesize_opts as optional argument below
        pes_ntasks, pes_nthrds, pes_rootpe = pesobj.find_pes_layout(self._gridname, self._compsetname,
                                                                    machine_name, pesize_opts=pecount)
        mach_pes_obj = self._get_env("mach_pes")
        totaltasks = {}
        for key, value in pes_ntasks.items():
            totaltasks[key[-3:]] = int(value)
            mach_pes_obj.set_value(key,int(value))
        for key, value in pes_rootpe.items():
            totaltasks[key[-3:]] += int(value)
            mach_pes_obj.set_value(key,int(value))
        for key, value in pes_nthrds.items():
            totaltasks[key[-3:]] *= int(value)
            mach_pes_obj.set_value(key,int(value))
        maxval = 1
        pes_per_node = mach_pes_obj.get_value("PES_PER_NODE")
        for key, val in totaltasks.items():
            if val < 0:
                val = -1*val*pes_per_node
            if val > maxval:
                maxval = val

        # Make sure that every component has been accounted for
        # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here.
        for compclass in self._component_classes:
            if compclass == "DRV":
                continue
            key = "NINST_%s"%compclass
            mach_pes_obj.set_value(key, ninst)
            key = "NTASKS_%s"%compclass
            if key not in pes_ntasks.keys():
                mach_pes_obj.set_value(key,1)
            key = "NTHRDS_%s"%compclass
            if compclass not in pes_nthrds.keys():
                mach_pes_obj.set_value(compclass,1)

        # FIXME - this is a short term fix for dealing with the restriction that
        # CISM1 cannot run on multiple cores
        if "CISM1" in self._compsetname:
            mach_pes_obj.set_value("NTASKS_GLC",1)
            mach_pes_obj.set_value("NTHRDS_GLC",1)

        #--------------------------------------------
        # batch system
        #--------------------------------------------
        batch_system_type = machobj.get_value("BATCH_SYSTEM")
        batch = Batch(batch_system=batch_system_type, machine=machine_name)
        bjobs = batch.get_batch_jobs()
        env_batch = self._get_env("batch")
        env_batch.set_batch_system(batch, batch_system_type=batch_system_type)
        env_batch.create_job_groups(bjobs)
        env_batch.set_job_defaults(bjobs, pesize=maxval)
        self._env_files_that_need_rewrite.add(env_batch)

        self.set_value("COMPSET",self._compsetname)

        self._set_pio_xml()
        logger.info(" Compset is: %s " %self._compsetname)
        logger.info(" Grid is: %s " %self._gridname )
        logger.info(" Components in compset are: %s " %self._components)

        # miscellaneous settings
        if self.get_value("RUN_TYPE") == 'hybrid':
            self.set_value("GET_REFCASE", True)

        # Set project id
        if project is None:
            project = get_project(machobj)
        if project is not None:
            self.set_value("PROJECT", project)
        elif machobj.get_value("PROJECT_REQUIRED"):
            expect(project is not None, "PROJECT_REQUIRED is true but no project found")
Ejemplo n.º 39
0
class TestScheduler(object):
###############################################################################

    ###########################################################################
    def __init__(self, test_names, test_data=None,
                 no_run=False, no_build=False, no_setup=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_cmp_name=None, baseline_gen_name=None,
                 clean=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 walltime=None, proc_pool=None,
                 use_existing=False, save_timing=False, queue=None,
                 allow_baseline_overwrite=False, output_root=None,
                 force_procs=None, force_threads=None, mpilib=None,
                 input_dir=None, pesfile=None, mail_user=None, mail_type=None):
    ###########################################################################
        self._cime_root       = CIME.utils.get_cime_root()
        self._cime_model      = get_model()
        self._save_timing     = save_timing
        self._queue           = queue
        self._test_data       = {} if test_data is None else test_data # Format:  {test_name -> {data_name -> data}}
        self._mpilib          = mpilib  # allow override of default mpilib
        self._completed_tests = 0
        self._input_dir       = input_dir
        self._pesfile         = pesfile
        self._allow_baseline_overwrite = allow_baseline_overwrite

        self._mail_user = mail_user
        self._mail_type = mail_type

        self._machobj = Machines(machine=machine_name)

        self._model_build_cost = 4

        # If user is forcing procs or threads, re-write test names to reflect this.
        if force_procs or force_threads:
            test_names = _translate_test_names_for_new_pecount(test_names, force_procs, force_threads)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run   = no_run or self._no_build
        self._output_root = output_root
        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        if test_root is not None:
            self._test_root = test_root
        elif self._output_root is not None:
            self._test_root = self._output_root
        else:
            self._test_root = self._machobj.get_value("CIME_OUTPUT_ROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_timestamp()

        self._compiler = self._machobj.get_default_compiler() if compiler is None else compiler

        self._clean          = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      self._machobj.get_value("MAX_MPITASKS_PER_NODE"))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name # Implies generation should be done if not None

        # Compute baseline_root
        self._baseline_root = baseline_root if baseline_root is not None \
                              else self._machobj.get_value("BASELINE_ROOT")

        if self._project is not None:
            self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

        self._baseline_root = os.path.abspath(self._baseline_root)

        if baseline_cmp_name or baseline_gen_name:
            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory {}".format(full_baseline_dir))

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)

                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                       "Baseline directories already exists {}\n" \
                       "Use -o to avoid this error".format(existing_baselines))

        if self._cime_model == "e3sm":
            _order_tests_by_runtime(test_names, self._baseline_root)

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # name -> (phase, status)
        self._tests = OrderedDict()
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("MAX_TASKS_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)

        if use_existing:
            for test in self._tests:
                with TestStatus(self._get_test_dir(test)) as ts:
                    for phase, status in ts:
                        if phase in CORE_PHASES:
                            if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                                if status == TEST_FAIL_STATUS:
                                    # Import for potential subsequent waits
                                    ts.set_status(phase, TEST_PEND_STATUS)

                                # We need to pick up here
                                break

                            else:
                                if phase != SUBMIT_PHASE:
                                    # Somewhat subtle. Create_test considers submit/run to be the run phase,
                                    # so don't try to update test status for a passed submit phase
                                    self._update_test_status(test, phase, TEST_PEND_STATUS)
                                    self._update_test_status(test, phase, status)

                                    if phase == RUN_PHASE:
                                        logger.info("Test {} passed and will not be re-run".format(test))

                logger.info("Using existing test directory {}".format(self._get_test_dir(test)))
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '{}', it already exists."
                       " Pick a different test-id".format(self._get_test_dir(test)))
                logger.info("Creating test directory {}".format(self._get_test_dir(test)))

        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def get_testnames(self):
    ###########################################################################
        return list(self._tests.keys())

    ###########################################################################
    def _log_output(self, test, output):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if not os.path.isdir(test_dir):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)
        append_testlog(output, caseroot=test_dir)

    ###########################################################################
    def _get_case_id(self, test):
    ###########################################################################
        baseline_action_code = ""
        if self._baseline_gen_name:
            baseline_action_code += "G"
        if self._baseline_cmp_name:
            baseline_action_code += "C"
        if len(baseline_action_code) > 0:
            return "{}.{}.{}".format(test, baseline_action_code, self._test_id)
        else:
            return "{}.{}".format(test, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
    ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
    ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
    ###########################################################################
        status = self._get_test_status(test)
        return status != TEST_PASS_STATUS and status != TEST_PEND_STATUS

    ###########################################################################
    def _work_remains(self, test):
    ###########################################################################
        test_phase, test_status = self._get_test_data(test)
        return (test_status == TEST_PASS_STATUS or test_status == TEST_PEND_STATUS) and\
            test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
    ###########################################################################
        curr_phase, curr_status = self._get_test_data(test)
        if phase is None or phase == curr_phase:
            return curr_status
        else:
            expect(phase is None or self._phases.index(phase) < self._phases.index(curr_phase),
                   "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
    ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
    ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status = self._get_test_data(test)

        if old_phase == phase:
            expect(old_status == TEST_PEND_STATUS,
                   "Only valid to transition from PEND to something else, found '{}' for phase '{}'".format(old_status, phase))
            expect(status != TEST_PEND_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(old_status == TEST_PASS_STATUS,
                   "Why did we move on to next phase when prior phase did not pass?")
            expect(status == TEST_PEND_STATUS,
                   "New phase should be set to pending status")
            expect(self._phases.index(old_phase) == phase_idx - 1,
                   "Skipped phase? {} {}".format(old_phase, phase_idx))

        # Must be atomic
        self._tests[test] = (phase, status)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
    ###########################################################################
        while True:
            rc, output, errput = run_cmd(cmd, from_dir=from_dir)
            if rc != 0:
                self._log_output(test,
                                 "{} FAILED for test '{}'.\nCommand: {}\nOutput: {}\n".
                                 format(phase, test, cmd,
                                        output.encode('utf-8') + b"\n" + errput.encode('utf-8')))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if "bad interpreter" in output:
                    time.sleep(1)
                    continue
                else:
                    return False, errput
            else:
                # We don't want "RUN PASSED" in the TestStatus.log if the only thing that
                # succeeded was the submission.
                phase = "SUBMIT" if phase == RUN_PHASE else phase
                self._log_output(test,
                                 "{} PASSED for test '{}'.\nCommand: {}\nOutput: {}\n".
                                 format(phase, test, cmd,
                                        output.encode('utf-8') + b"\n" + errput.encode('utf-8')))
                return True, errput

    ###########################################################################
    def _create_newcase_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        _, case_opts, grid, compset,\
            machine, compiler, test_mods = CIME.utils.parse_test_name(test)

        create_newcase_cmd = "{} --case {} --res {} --compset {}"\
                             " --test".format(os.path.join(self._cime_root, "scripts", "create_newcase"),
                                              test_dir, grid, compset)
        if machine is not None:
            create_newcase_cmd += " --machine {}".format(machine)
        if compiler is not None:
            create_newcase_cmd += " --compiler {}".format(compiler)
        if self._project is not None:
            create_newcase_cmd += " --project {} ".format(self._project)
        if self._output_root is not None:
            create_newcase_cmd += " --output-root {} ".format(self._output_root)
        if self._input_dir is not None:
            create_newcase_cmd += " --input-dir {} ".format(self._input_dir)

        if self._pesfile is not None:
            create_newcase_cmd += " --pesfile {} ".format(self._pesfile)

        if test_mods is not None:
            files = Files()
            (component, modspath) = test_mods.split('/',1)

            testmods_dir = files.get_value("TESTS_MODS_DIR", {"component": component})
            test_mod_file = os.path.join(testmods_dir, component, modspath)
            if not os.path.exists(test_mod_file):
                error = "Missing testmod file '{}'".format(test_mod_file)
                self._log_output(test, error)
                return False, error

            create_newcase_cmd += " --user-mods-dir {}".format(test_mod_file)

        mpilib = None
        ninst = 1
        ncpl = 1
        if case_opts is not None:
            for case_opt in case_opts: # pylint: disable=not-an-iterable
                if case_opt.startswith('M'):
                    mpilib = case_opt[1:]
                    create_newcase_cmd += " --mpilib {}".format(mpilib)
                    logger.debug (" MPILIB set to {}".format(mpilib))
                elif case_opt.startswith('N'):
                    expect(ncpl == 1,"Cannot combine _C and _N options")
                    ninst = case_opt[1:]
                    create_newcase_cmd += " --ninst {}".format(ninst)
                    logger.debug (" NINST set to {}".format(ninst))
                elif case_opt.startswith('C'):
                    expect(ninst == 1,"Cannot combine _C and _N options")
                    ncpl = case_opt[1:]
                    create_newcase_cmd += " --ninst {} --multi-driver" .format(ncpl)
                    logger.debug (" NCPL set to {}" .format(ncpl))
                elif case_opt.startswith('P'):
                    pesize = case_opt[1:]
                    create_newcase_cmd += " --pecount {}".format(pesize)
                elif case_opt.startswith('V'):
                    driver = case_opt[1:]
                    create_newcase_cmd += " --driver {}".format(driver)

        # create_test mpilib option overrides default but not explicitly set case_opt mpilib
        if mpilib is None and self._mpilib is not None:
            create_newcase_cmd += " --mpilib {}".format(self._mpilib)
            logger.debug (" MPILIB set to {}".format(self._mpilib))

        if self._queue is not None:
            create_newcase_cmd += " --queue={}".format(self._queue)

        if self._walltime is not None:
            create_newcase_cmd += " --walltime {}".format(self._walltime)
        else:
            # model specific ways of setting time
            if self._cime_model == "e3sm":
                recommended_time = _get_time_est(test, self._baseline_root)

                if recommended_time is not None:
                    create_newcase_cmd += " --walltime {}".format(recommended_time)

            else:
                if test in self._test_data and "options" in self._test_data[test] and \
                        "wallclock" in self._test_data[test]['options']:
                    create_newcase_cmd += " --walltime {}".format(self._test_data[test]['options']['wallclock'])

        logger.debug("Calling create_newcase: " + create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd, CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
    ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]

        # Create, fill and write an envtest object
        test_dir = self._get_test_dir(test)
        envtest = EnvTest(test_dir)

        # Determine list of component classes that this coupler/driver knows how
        # to deal with. This list follows the same order as compset longnames follow.
        files = Files()
        drv_config_file = files.get_value("CONFIG_CPL_FILE")
        drv_comp = Component(drv_config_file, "CPL")
        envtest.add_elements_by_group(files, {}, "env_test.xml")
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)
        if test in self._test_data and "options" in self._test_data[test] and \
                "memleak_tolerance" in self._test_data[test]['options']:
            envtest.set_value("TEST_MEMLEAK_TOLERANCE", self._test_data[test]['options']['memleak_tolerance'])

        test_argv = "-testname {} -testroot {}".format(test, self._test_root)
        if self._baseline_gen_name:
            test_argv += " -generate {}".format(self._baseline_gen_name)
            basegen_case_fullpath = os.path.join(self._baseline_root,self._baseline_gen_name, test)
            logger.debug("basegen_case is {}".format(basegen_case_fullpath))
            envtest.set_value("BASELINE_NAME_GEN", self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE", os.path.join(self._baseline_gen_name, test))
        if self._baseline_cmp_name:
            test_argv += " -compare {}".format(self._baseline_cmp_name)
            envtest.set_value("BASELINE_NAME_CMP", self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE", os.path.join(self._baseline_cmp_name, test))

        envtest.set_value("TEST_ARGV", test_argv)
        envtest.set_value("CLEANUP", self._clean)

        envtest.set_value("BASELINE_ROOT", self._baseline_root)
        envtest.set_value("GENERATE_BASELINE", self._baseline_gen_name is not None)
        envtest.set_value("COMPARE_BASELINE", self._baseline_cmp_name is not None)
        envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC", resolved=False))
        tput_tolerance = self._machobj.get_value("TEST_TPUT_TOLERANCE", resolved=False)
        envtest.set_value("TEST_TPUT_TOLERANCE", 0.25 if tput_tolerance is None else tput_tolerance)

        # Add the test instructions from config_test to env_test in the case
        config_test = Tests()
        testnode = config_test.get_test_node(test_case)
        envtest.add_test(testnode)
        # Determine the test_case from the test name
        test_case, case_opts = CIME.utils.parse_test_name(test)[:2]

        # Determine case_opts from the test_case
        if case_opts is not None:
            logger.debug("case_opts are {} ".format(case_opts))
            for opt in case_opts: # pylint: disable=not-an-iterable

                logger.debug("case_opt is {}".format(opt))
                if opt == 'D':
                    envtest.set_test_parameter("DEBUG", "TRUE")
                    logger.debug (" DEBUG set to TRUE")

                elif opt == 'E':
                    envtest.set_test_parameter("USE_ESMF_LIB", "TRUE")
                    logger.debug (" USE_ESMF_LIB set to TRUE")

                elif opt == 'CG':
                    envtest.set_test_parameter("CALENDAR", "GREGORIAN")
                    logger.debug (" CALENDAR set to {}".format(opt))

                elif opt.startswith('L'):
                    match =  re.match('L([A-Za-z])([0-9]*)', opt)
                    stop_option = {"y":"nyears", "m":"nmonths", "d":"ndays", "h":"nhours",
                                   "s":"nseconds", "n":"nsteps"}
                    opt = match.group(1)
                    envtest.set_test_parameter("STOP_OPTION",stop_option[opt])
                    opti = match.group(2)
                    envtest.set_test_parameter("STOP_N", opti)

                    logger.debug (" STOP_OPTION set to {}".format(stop_option[opt]))
                    logger.debug (" STOP_N      set to {}".format(opti))

                elif opt.startswith('R'):
                    # R option is for testing in PTS_MODE or Single Column Model
                    #  (SCM) mode
                    envtest.set_test_parameter("PTS_MODE", "TRUE")

                    # For PTS_MODE, compile with mpi-serial
                    envtest.set_test_parameter("MPILIB", "mpi-serial")

                elif (opt.startswith('I') or # Marker to distinguish tests with same name - ignored
                      opt.startswith('M') or # handled in create_newcase
                      opt.startswith('P') or # handled in create_newcase
                      opt.startswith('N') or # handled in create_newcase
                      opt.startswith('C') or # handled in create_newcase
                      opt.startswith('V')):  # handled in create_newcase
                    pass

                elif opt.startswith('IOP'):
                    logger.warning("IOP test option not yet implemented")
                else:
                    expect(False, "Could not parse option '{}' ".format(opt))

        envtest.write()
        lock_file("env_run.xml", caseroot=test_dir, newname="env_run.orig.xml")

        with Case(test_dir, read_only=False) as case:
            if self._output_root is None:
                self._output_root = case.get_value("CIME_OUTPUT_ROOT")
            # if we are running a single test we don't need sharedlibroot
            if len(self._tests) > 1 and self._cime_model != "e3sm":
                case.set_value("SHAREDLIBROOT",
                               os.path.join(self._output_root,
                                            "sharedlibroot.{}".format(self._test_id)))
            envtest.set_initial_values(case)
            case.set_value("TEST", True)
            case.set_value("SAVE_TIMING", self._save_timing)

            # Scale back build parallelism on systems with few cores
            if self._model_build_cost > self._proc_pool:
                case.set_value("GMAKE_J", self._proc_pool)
                self._model_build_cost = self._proc_pool
Ejemplo n.º 40
0
def load_balancing_submit(compset, res, pesfile, mpilib, compiler, project, machine,
                          extra_options_file, test_id, force_purge, test_root):
################################################################################
    # Read in list of pes from given file
    expect(os.access(pesfile, os.R_OK), 'ERROR: File %s not found', pesfile)

    logger.info('Reading XML file %s. Searching for pesize entries:', pesfile)
    try:
        pesobj = Pes(pesfile)
    except ParseError:
        expect(False, 'ERROR: File %s not parseable', pesfile)

    pesize_list = []
    grid_nodes = pesobj.get_children("grid")
    for gnode in grid_nodes:
        mach_nodes = pesobj.get_children("mach", root=gnode)
        for mnode in mach_nodes:
            pes_nodes = pesobj.get_children("pes", root=mnode)
            for pnode in pes_nodes:
                pesize = pesobj.get(pnode, 'pesize')
                if not pesize:
                    logger.critical('No pesize for pes node in file %s', pesfile)
                if pesize in pesize_list:
                    logger.critical('pesize %s duplicated in file %s', pesize, pesfile)
                pesize_list.append(pesize)

    expect(pesize_list, 'ERROR: No grid entries found in pes file {}'.format(pesfile))

    machobj = Machines(machine=machine)
    if test_root is None:
        test_root = machobj.get_value("CIME_OUTPUT_ROOT")
    if machine is None:
        machine = machobj.get_machine_name()
        print "machine is {}".format(machine)
    if compiler is None:
        compiler = machobj.get_default_compiler()
        print "compiler is {}".format(compiler)
    if mpilib is None:
        mpilib = machobj.get_default_MPIlib({"compiler":compiler})




    test_names = []
    for i in xrange(len(pesize_list)):
        test_names.append(get_full_test_name("PFS_I{}".format(i),grid=res, compset=compset,
                                             machine=machine, compiler=compiler))
        casedir = os.path.join(test_root, test_names[-1] + "." + test_id)
        print "casedir is {}".format(casedir)
        if os.path.isdir(casedir):
            if force_purge:
                logger.info('Removing directory %s', casedir)
                shutil.rmtree(casedir)
            else:
                expect(False,
                       "casedir {} already exists, use the --force-purge option, --test-root or"
                       " --test-id options".format(casedir))

    tests = TestScheduler(test_names, no_setup = True,
                          compiler=compiler, machine_name=machine, mpilib=mpilib,
                          test_root=test_root, test_id=test_id, project=project)
    success = tests.run_tests(wait=True)
    expect(success, "Error in creating cases")
    testnames = []
    for test in tests.get_testnames():
        testname =  os.path.join(test_root, test + "." + test_id)
        testnames.append( testname)
        logger.info("test is {}".format(testname))
        with Case(testname) as case:
            pes_ntasks, pes_nthrds, pes_rootpe, _, _, _ = \
                                                    pesobj.find_pes_layout('any', 'any', 'any', pesize_opts=pesize_list.pop(0))
            for key in pes_ntasks:
                case.set_value(key, pes_ntasks[key])
            for key in pes_nthrds:
                case.set_value(key, pes_nthrds[key])
            for key in pes_rootpe:
                case.set_value(key, pes_rootpe[key])

            if extra_options_file is not None:
                try:
                    extras = open(extra_options_file, 'r')
                    for line in extras.readlines():
                        split = line.split('=')
                        if len(split) == 2:
                            logger.info('setting %s=%s', split[0], split[1])
                            case.set_value(split[0], split[1])
                        else:
                            logger.debug('ignoring line in {}: {}'.format(
                                extra_options_file, line))
                    extras.close()
                except IOError:
                    expect(False, "ERROR: Could not read file {}".format(extra_options_file))


    tests = TestScheduler(test_names, use_existing=True, test_root=test_root, test_id=test_id)
    success = tests.run_tests(wait=False)
    expect(success, "Error in running cases")

    # need to fix
    logger.info('Timing jobs submitted. After jobs completed, run to optimize '
                'pe layout:\n  load_balancing_solve --test-id {} --test-root {}'.
                format(test_id, test_root))
Ejemplo n.º 41
0
    def __init__(self, test_names, test_data=None,
                 no_run=False, no_build=False, no_setup=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_cmp_name=None, baseline_gen_name=None,
                 clean=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 walltime=None, proc_pool=None,
                 use_existing=False, save_timing=False, queue=None,
                 allow_baseline_overwrite=False, output_root=None,
                 force_procs=None, force_threads=None, mpilib=None,
                 input_dir=None, pesfile=None, mail_user=None, mail_type=None):
    ###########################################################################
        self._cime_root       = CIME.utils.get_cime_root()
        self._cime_model      = get_model()
        self._save_timing     = save_timing
        self._queue           = queue
        self._test_data       = {} if test_data is None else test_data # Format:  {test_name -> {data_name -> data}}
        self._mpilib          = mpilib  # allow override of default mpilib
        self._completed_tests = 0
        self._input_dir       = input_dir
        self._pesfile         = pesfile
        self._allow_baseline_overwrite = allow_baseline_overwrite

        self._mail_user = mail_user
        self._mail_type = mail_type

        self._machobj = Machines(machine=machine_name)

        self._model_build_cost = 4

        # If user is forcing procs or threads, re-write test names to reflect this.
        if force_procs or force_threads:
            test_names = _translate_test_names_for_new_pecount(test_names, force_procs, force_threads)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run   = no_run or self._no_build
        self._output_root = output_root
        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        if test_root is not None:
            self._test_root = test_root
        elif self._output_root is not None:
            self._test_root = self._output_root
        else:
            self._test_root = self._machobj.get_value("CIME_OUTPUT_ROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_timestamp()

        self._compiler = self._machobj.get_default_compiler() if compiler is None else compiler

        self._clean          = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      self._machobj.get_value("MAX_MPITASKS_PER_NODE"))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name # Implies generation should be done if not None

        # Compute baseline_root
        self._baseline_root = baseline_root if baseline_root is not None \
                              else self._machobj.get_value("BASELINE_ROOT")

        if self._project is not None:
            self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

        self._baseline_root = os.path.abspath(self._baseline_root)

        if baseline_cmp_name or baseline_gen_name:
            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory {}".format(full_baseline_dir))

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)

                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                       "Baseline directories already exists {}\n" \
                       "Use -o to avoid this error".format(existing_baselines))

        if self._cime_model == "e3sm":
            _order_tests_by_runtime(test_names, self._baseline_root)

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # name -> (phase, status)
        self._tests = OrderedDict()
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("MAX_TASKS_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)

        if use_existing:
            for test in self._tests:
                with TestStatus(self._get_test_dir(test)) as ts:
                    for phase, status in ts:
                        if phase in CORE_PHASES:
                            if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                                if status == TEST_FAIL_STATUS:
                                    # Import for potential subsequent waits
                                    ts.set_status(phase, TEST_PEND_STATUS)

                                # We need to pick up here
                                break

                            else:
                                if phase != SUBMIT_PHASE:
                                    # Somewhat subtle. Create_test considers submit/run to be the run phase,
                                    # so don't try to update test status for a passed submit phase
                                    self._update_test_status(test, phase, TEST_PEND_STATUS)
                                    self._update_test_status(test, phase, status)

                                    if phase == RUN_PHASE:
                                        logger.info("Test {} passed and will not be re-run".format(test))

                logger.info("Using existing test directory {}".format(self._get_test_dir(test)))
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '{}', it already exists."
                       " Pick a different test-id".format(self._get_test_dir(test)))
                logger.info("Creating test directory {}".format(self._get_test_dir(test)))
Ejemplo n.º 42
0
    def configure(self,
                  compset_name,
                  grid_name,
                  machine_name=None,
                  project=None,
                  pecount=None,
                  compiler=None,
                  mpilib=None,
                  user_compset=False,
                  pesfile=None,
                  user_grid=False,
                  gridfile=None,
                  ninst=1,
                  test=False,
                  walltime=None,
                  queue=None,
                  output_root=None):

        #--------------------------------------------
        # compset, pesfile, and compset components
        #--------------------------------------------
        self._set_compset_and_pesfile(compset_name,
                                      user_compset=user_compset,
                                      pesfile=pesfile)

        self._components = self.get_compset_components()
        #FIXME - if --user-compset is True then need to determine that
        #all of the compset settings are valid

        #--------------------------------------------
        # grid
        #--------------------------------------------
        if user_grid is True and gridfile is not None:
            self.set_value("GRIDS_SPEC_FILE", gridfile)
        grids = Grids(gridfile)

        gridinfo = grids.get_grid_info(name=grid_name,
                                       compset=self._compsetname)

        self._gridname = gridinfo["GRID"]
        for key, value in gridinfo.items():
            logger.debug("Set grid %s %s" % (key, value))
            self.set_lookup_value(key, value)

        #--------------------------------------------
        # component config data
        #--------------------------------------------
        self._get_component_config_data()

        self.get_compset_var_settings()

        #--------------------------------------------
        # machine
        #--------------------------------------------
        # set machine values in env_xxx files
        machobj = Machines(machine=machine_name)
        machine_name = machobj.get_machine_name()
        self.set_value("MACH", machine_name)
        nodenames = machobj.get_node_names()
        nodenames =  [x for x in nodenames if
                      '_system' not in x and '_variables' not in x and 'mpirun' not in x and\
                      'COMPILER' not in x and 'MPILIB' not in x]

        for nodename in nodenames:
            value = machobj.get_value(nodename, resolved=False)
            type_str = self.get_type_info(nodename)
            if type_str is not None:
                logger.debug("machine nodname %s value %s" % (nodename, value))
                self.set_value(nodename,
                               convert_to_type(value, type_str, nodename))

        if compiler is None:
            compiler = machobj.get_default_compiler()
        else:
            expect(
                machobj.is_valid_compiler(compiler),
                "compiler %s is not supported on machine %s" %
                (compiler, machine_name))

        self.set_value("COMPILER", compiler)

        if mpilib is None:
            mpilib = machobj.get_default_MPIlib({"compiler": compiler})
        else:
            expect(
                machobj.is_valid_MPIlib(mpilib, {"compiler": compiler}),
                "MPIlib %s is not supported on machine %s" %
                (mpilib, machine_name))
        self.set_value("MPILIB", mpilib)

        machdir = machobj.get_machines_dir()
        self.set_value("MACHDIR", machdir)

        # Create env_mach_specific settings from machine info.
        env_mach_specific_obj = self.get_env("mach_specific")
        env_mach_specific_obj.populate(machobj)
        self.schedule_rewrite(env_mach_specific_obj)

        #--------------------------------------------
        # pe payout
        #--------------------------------------------
        match1 = re.match('([0-9]+)x([0-9]+)',
                          "" if pecount is None else pecount)
        match2 = re.match('([0-9]+)', "" if pecount is None else pecount)
        pes_ntasks = {}
        pes_nthrds = {}
        pes_rootpe = {}
        if match1:
            opti_tasks = match1.group(1)
            opti_thrds = match1.group(2)
        elif match2:
            opti_tasks = match2.group(1)
            opti_thrds = 1

        other = {}
        if match1 or match2:
            for component_class in self._component_classes:
                if component_class == "DRV":
                    component_class = "CPL"
                string = "NTASKS_" + component_class
                pes_ntasks[string] = opti_tasks
                string = "NTHRDS_" + component_class
                pes_nthrds[string] = opti_thrds
                string = "ROOTPE_" + component_class
                pes_rootpe[string] = 0
        else:
            pesobj = Pes(self._pesfile)

            pes_ntasks, pes_nthrds, pes_rootpe, other = pesobj.find_pes_layout(
                self._gridname,
                self._compsetname,
                machine_name,
                pesize_opts=pecount)

        mach_pes_obj = self.get_env("mach_pes")
        totaltasks = {}
        # Since other items may include PES_PER_NODE we need to do this first
        # we can get rid of this code when all of the perl is removed
        for key, value in other.items():
            self.set_value(key, value)
        pes_per_node = self.get_value("PES_PER_NODE")
        for key, value in pes_ntasks.items():
            totaltasks[key[-3:]] = int(value)
            mach_pes_obj.set_value(key, int(value), pes_per_node=pes_per_node)
        for key, value in pes_rootpe.items():
            totaltasks[key[-3:]] += int(value)
            mach_pes_obj.set_value(key, int(value), pes_per_node=pes_per_node)
        for key, value in pes_nthrds.items():
            totaltasks[key[-3:]] *= int(value)
            mach_pes_obj.set_value(key, int(value), pes_per_node=pes_per_node)

        maxval = 1
        if mpilib != "mpi-serial":
            for key, val in totaltasks.items():
                if val < 0:
                    val = -1 * val * pes_per_node
                if val > maxval:
                    maxval = val

        # Make sure that every component has been accounted for
        # set, nthrds and ntasks to 1 otherwise. Also set the ninst values here.
        for compclass in self._component_classes:
            if compclass == "DRV":
                continue
            key = "NINST_%s" % compclass
            mach_pes_obj.set_value(key, ninst)
            key = "NTASKS_%s" % compclass
            if key not in pes_ntasks.keys():
                mach_pes_obj.set_value(key, 1)
            key = "NTHRDS_%s" % compclass
            if compclass not in pes_nthrds.keys():
                mach_pes_obj.set_value(compclass, 1)

        # FIXME - this is a short term fix for dealing with the restriction that
        # CISM1 cannot run on multiple cores
        if "CISM1" in self._compsetname:
            mach_pes_obj.set_value("NTASKS_GLC", 1)
            mach_pes_obj.set_value("NTHRDS_GLC", 1)

        #--------------------------------------------
        # batch system
        #--------------------------------------------
        batch_system_type = machobj.get_value("BATCH_SYSTEM")
        batch = Batch(batch_system=batch_system_type, machine=machine_name)
        bjobs = batch.get_batch_jobs()
        env_batch = self.get_env("batch")
        env_batch.set_batch_system(batch, batch_system_type=batch_system_type)
        env_batch.create_job_groups(bjobs)
        env_batch.set_job_defaults(bjobs,
                                   pesize=maxval,
                                   walltime=walltime,
                                   force_queue=queue)
        self.schedule_rewrite(env_batch)

        self.set_value("COMPSET", self._compsetname)

        self._set_pio_xml()
        logger.info(" Compset is: %s " % self._compsetname)
        logger.info(" Grid is: %s " % self._gridname)
        logger.info(" Components in compset are: %s " % self._components)

        # Set project id
        if project is None:
            project = get_project(machobj)
        if project is not None:
            self.set_value("PROJECT", project)
        elif machobj.get_value("PROJECT_REQUIRED"):
            expect(project is not None,
                   "PROJECT_REQUIRED is true but no project found")

        # Resolve the CIME_OUTPUT_ROOT variable, other than this
        # we don't want to resolve variables until we need them
        if output_root is None:
            output_root = self.get_value("CIME_OUTPUT_ROOT")
        self.set_value("CIME_OUTPUT_ROOT", output_root)

        # Overwriting an existing exeroot or rundir can cause problems
        exeroot = self.get_value("EXEROOT")
        rundir = self.get_value("RUNDIR")
        for wdir in (exeroot, rundir):
            logging.debug("wdir is %s" % wdir)
            if os.path.exists(wdir):
                expect(not test,
                       "Directory %s already exists, aborting test" % wdir)
                response = raw_input(
                    "\nDirectory %s already exists, (r)eplace, (a)bort, or (u)se existing?"
                    % wdir)
                if response.startswith("r"):
                    shutil.rmtree(wdir)
                else:
                    expect(response.startswith("u"),
                           "Aborting by user request")

        # miscellaneous settings
        if self.get_value("RUN_TYPE") == 'hybrid':
            self.set_value("GET_REFCASE", True)

        # Turn on short term archiving as cesm default setting
        model = get_model()
        self.set_model_version(model)
        if model == "cesm" and not test:
            self.set_value("DOUT_S", True)
            self.set_value("TIMER_LEVEL", 4)
        if test:
            self.set_value("TEST", True)
        self.initialize_derived_attributes()
Ejemplo n.º 43
0
Archivo: tsc.py Proyecto: dabail10/cime
    def _compare_baseline(self):
        with self._test_status as ts:
            ts.set_status(CIME.test_status.BASELINE_PHASE,
                          CIME.test_status.TEST_FAIL_STATUS)

            run_dir = self._case.get_value("RUNDIR")
            case_name = self._case.get_value("CASE")
            base_dir = os.path.join(
                self._case.get_value("BASELINE_ROOT"),
                self._case.get_value("BASECMP_CASE"),
            )

            test_name = "{}".format(case_name.split(".")[-1])
            evv_config = {
                test_name: {
                    "module": os.path.join(evv_lib_dir, "extensions",
                                           "tsc.py"),
                    "test-case": case_name,
                    "test-dir": run_dir,
                    "ref-case": "Baseline",
                    "ref-dir": base_dir,
                    "time-slice": [OUT_FREQ, SIM_LENGTH],
                    "inspect-times": INSPECT_AT,
                    "variables": VAR_LIST,
                    "p-threshold": P_THRESHOLD,
                    "component": self.atmmod,
                }
            }

            json_file = os.path.join(run_dir, ".".join([case_name, "json"]))
            with open(json_file, "w") as config_file:
                json.dump(evv_config, config_file, indent=4)

            evv_out_dir = os.path.join(run_dir, ".".join([case_name, "evv"]))
            evv(["-e", json_file, "-o", evv_out_dir])

            with open(os.path.join(evv_out_dir, "index.json"), "r") as evv_f:
                evv_status = json.load(evv_f)

            comments = ""
            for evv_ele in evv_status["Page"]["elements"]:
                if "Table" in evv_ele:
                    comments = "; ".join(
                        "{}: {}".format(key, val[0])
                        for key, val in evv_ele["Table"]["data"].items())
                    if evv_ele["Table"]["data"]["Test status"][0].lower(
                    ) == "pass":
                        self._test_status.set_status(
                            CIME.test_status.BASELINE_PHASE,
                            CIME.test_status.TEST_PASS_STATUS,
                        )
                    break

            status = self._test_status.get_status(
                CIME.test_status.BASELINE_PHASE)
            mach_name = self._case.get_value("MACH")
            mach_obj = Machines(machine=mach_name)
            htmlroot = CIME.utils.get_htmlroot(mach_obj)
            urlroot = CIME.utils.get_urlroot(mach_obj)
            if htmlroot is not None:
                with CIME.utils.SharedArea():
                    dir_util.copy_tree(
                        evv_out_dir,
                        os.path.join(htmlroot, "evv", case_name),
                        preserve_mode=False,
                    )
                if urlroot is None:
                    urlroot = "[{}_URL]".format(mach_name.capitalize())
                viewing = "{}/evv/{}/index.html".format(urlroot, case_name)
            else:
                viewing = (
                    "{}\n"
                    "    EVV viewing instructions can be found at: "
                    "        https://github.com/E3SM-Project/E3SM/blob/master/cime/scripts/"
                    "climate_reproducibility/README.md#test-passfail-and-extended-output"
                    "".format(evv_out_dir))

            comments = ("{} {} for test '{}'.\n"
                        "    {}\n"
                        "    EVV results can be viewed at:\n"
                        "        {}".format(
                            CIME.test_status.BASELINE_PHASE,
                            status,
                            test_name,
                            comments,
                            viewing,
                        ))

            CIME.utils.append_testlog(comments, self._orig_caseroot)
Ejemplo n.º 44
0
class SystemTest(object):
###############################################################################

    ###########################################################################
    def __init__(self, test_names,
                 no_run=False, no_build=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None,compiler=None,
                 baseline_root=None, baseline_name=None,
                 clean=False,compare=False, generate=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 xml_machine=None, xml_compiler=None, xml_category=None,xml_testlist=None):
    ###########################################################################
        self._cime_root = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        # needed for perl interface
        os.environ["CIMEROOT"] = self._cime_root
        self._machobj   = Machines(machine=machine_name)
        machine_name    = self._machobj.get_machine_name()

        self._no_build = no_build      if not namelists_only else True
        self._no_run   = no_run        if not self._no_build else True

        # Figure out what project to use
        if (project is None):
            self._project = CIME.utils.get_project()
            if (self._project is None):
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()

        self._test_root = test_root if test_root is not None else self._machobj.get_value("CESMSCRATCHROOT")
        if (self._project is not None):
            self._test_root = self._test_root.replace("$PROJECT", self._project)
        self._test_root = os.path.abspath(self._test_root)

        self._test_id = test_id if test_id is not None else CIME.utils.get_utc_timestamp()

        self._compiler = compiler if compiler is not None else self._machobj.get_default_compiler()
        expect(self._machobj.is_valid_compiler(self._compiler),
               "Compiler %s not valid for machine %s" % (self._compiler,machine_name))

        self._clean          = clean
        self._namelists_only = namelists_only

        # Extra data associated with tests, do not modify after construction
        # test_name -> test_data
        #   test_data: name -> value
        self._test_data      = {}

        # If xml options are provided get tests from xml file, otherwise use acme dictionary
        if(not test_names and (xml_machine is not None or xml_category is not None or xml_compiler is not None or xml_testlist is not None)):
            test_data = CIME.test_utils.get_tests_from_xml(xml_machine, xml_category, xml_compiler, xml_testlist, machine_name, compiler)
            test_names = [item["name"] for item in test_data]
            for test_datum in test_data:
                self._test_data[test_datum["name"]] = test_datum
        else:
            expect(len(test_names) > 0, "No tests to run")
            test_names = update_acme_tests.get_full_test_names(test_names, machine_name, self._compiler)

        if (parallel_jobs is None):
            self._parallel_jobs  = min(len(test_names), int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = None
        self._baseline_gen_name = None
        self._compare = False
        self._generate = False
        if (compare or generate):
            # Figure out what baseline name to use
            if (baseline_name is None):
                if(compare is not None and isinstance(compare,str)):
                    self._baseline_cmp_name = compare
                    self._compare = True
                if(generate is not None and isinstance(generate,str)):
                    self._baseline_gen_name = generate
                    self._generate = True
                branch_name = CIME.utils.get_current_branch(repo=self._cime_root)
                expect(branch_name is not None, "Could not determine baseline name from branch, please use -b option")
                if(self._compare and self._baseline_cmp_name is None):
                    self._baseline_cmp_name = os.path.join(self._compiler, branch_name)
                if(self._generate and self._baseline_gen_name is None):
                    self._baseline_gen_name = os.path.join(self._compiler, branch_name)
            else:
                if(compare):
                    self._compare = True
                    self._baseline_cmp_name  = baseline_name
                    if (not self._baseline_cmp_name.startswith("%s/" % self._compiler)):
                        self._baseline_cmp_name = os.path.join(self._compiler, self._baseline_cmp_name)
                if(generate):
                    self._generate = True
                    self._baseline_gen_name  = baseline_name
                    if (not self._baseline_gen_name.startswith("%s/" % self._compiler)):
                        self._baseline_gen_name = os.path.join(self._compiler, self._baseline_gen_name)

            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None else self._machobj.get_value("CCSM_BASELINE")
            if (self._project is not None):
                self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)
            self._baseline_root = os.path.abspath(self._baseline_root)

            if (self._compare):
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory %s" % full_baseline_dir)
        else:
            self._baseline_root = None
        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # Since the name-list phase can fail without aborting later phases, we
        # need some extra state to remember tests that had namelist problems.
        # name -> (phase, status, has_namelist_problem)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (INITIAL_PHASE, TEST_PASS_STATUS, False)

        # Oversubscribe by 1/4
        pes = int(self._machobj.get_value("PES_PER_NODE"))
        self._proc_pool = int(pes * 1.25)

        # Setup phases
        self._phases = list(PHASES)
        if (no_build):
            self._phases.remove(BUILD_PHASE)
        if (no_run):
            self._phases.remove(RUN_PHASE)
        if (not self._compare and not self._generate):
            self._phases.remove(NAMELIST_PHASE)

        # None of the test directories should already exist.
        for test in self._tests:
            expect(not os.path.exists(self._get_test_dir(test)),
                   "Cannot create new case in directory '%s', it already exists. Pick a different test-id" % self._get_test_dir(test))
        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def _log_output(self, test, output):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if (not os.path.isdir(test_dir)):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)

        with open(os.path.join(test_dir, "TestStatus.log"), "a") as fd:
            fd.write(output)

    ###########################################################################
    def _get_case_id(self, test):
    ###########################################################################
        baseline_action_code = ".C" if self._compare else (".G" if self._generate else "")
        return "%s%s.%s" % (test, baseline_action_code, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
    ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
    ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
    ###########################################################################
        status = self._get_test_status(test)
        return status not in CONTINUE and status != TEST_PENDING_STATUS

    ###########################################################################
    def _work_remains(self, test):
    ###########################################################################
        test_phase, test_status, _ = self._get_test_data(test)
        return (test_status in CONTINUE or test_status == TEST_PENDING_STATUS) and test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
    ###########################################################################
        curr_phase, curr_status, nl_fail = self._get_test_data(test)
        if (phase == NAMELIST_PHASE and nl_fail):
            return NAMELIST_FAIL_STATUS
        elif (phase is None or phase == curr_phase):
            return curr_status
        else:
            expect(phase is None or self._phases.index(phase) < self._phases.index(curr_phase),
                   "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
    ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
    ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status, old_nl_fail = self._get_test_data(test)

        if (old_phase == phase):
            expect(old_status == TEST_PENDING_STATUS,
                   "Only valid to transition from PENDING to something else, found '%s'" % old_status)
            expect(status != TEST_PENDING_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(old_status in CONTINUE,
                   "Why did we move on to next phase when prior phase did not pass?")
            expect(status == TEST_PENDING_STATUS,
                   "New phase should be set to pending status")
            expect(self._phases.index(old_phase) == phase_idx - 1,
                   "Skipped phase?")
        # Must be atomic
        self._tests[test] = (phase, status, old_nl_fail)


    ###########################################################################
    def _test_has_nl_problem(self, test):
    ###########################################################################
        curr_phase, curr_status, _ = self._get_test_data(test)
        expect(curr_phase == NAMELIST_PHASE, "Setting namelist status outside of namelist phase?")
        # Must be atomic
        self._tests[test] = (curr_phase, curr_status, True)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
    ###########################################################################
        while (True):
            rc, output, errput = run_cmd(cmd, ok_to_fail=True, from_dir=from_dir)
            if (rc != 0):
                self._log_output(test,
                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                 (phase, test, cmd, output, errput))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if ("bad interpreter" in errput):
                    time.sleep(1)
                    continue
                else:
                    break
            else:
                self._log_output(test,
                                 "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                 (phase, test, cmd, output, errput))
                break

        return rc == 0

    ###########################################################################
    def _create_newcase_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        test_case, case_opts, grid, compset, machine, compiler, test_mods = CIME.utils.parse_test_name(test)
        if (compiler != self._compiler):
            raise StandardError("Test '%s' has compiler that does not match instance compliler '%s'" % (test, self._compiler))
        if (self._parallel_jobs == 1):
            scratch_dir = self._machobj.get_value("CESMSCRATCHROOT")
            if (self._project is not None):
                scratch_dir = scratch_dir.replace("$PROJECT", self._project)
            sharedlibroot = os.path.join(scratch_dir, "sharedlibroot.%s" % self._test_id)
        else:
            # Parallelizing builds introduces potential sync problems with sharedlibroot
            # Just let every case build it's own
            sharedlibroot = os.path.join(test_dir, "sharedlibroot.%s" % self._test_id)
        create_newcase_cmd = "%s -model %s -case %s -res %s -mach %s -compiler %s -compset %s -testname %s -project %s -sharedlibroot %s" % \
                              (os.path.join(self._cime_root,"scripts", "create_newcase"),
                               self._cime_model,test_dir, grid, machine, compiler, compset, test_case, self._project,
                               sharedlibroot)
        if (test_case != 'PFS'):
            create_newcase_cmd += " -nosavetiming "
        if (case_opts is not None):
            create_newcase_cmd += " -confopts _%s" % ("_".join(case_opts))
        if (test_mods is not None):
            files = Files()
            (component, mods) = test_mods.split('/')
            testmods_dir = files.get_value("TESTS_MODS_DIR",{"component": component})
            test_mod_file = os.path.join(testmods_dir, component, mods)
            if (not os.path.exists(test_mod_file)):
                self._log_output(test, "Missing testmod file '%s'" % test_mod_file)
                return False
            create_newcase_cmd += " -user_mods_dir %s" % test_mod_file
        logging.info("Calling create_newcase: "+create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd, CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
    ###########################################################################

        test_case = CIME.utils.parse_test_name(test)[0]
        envtest = EnvTest(self._get_test_dir(test))

        files = Files()
        drv_config_file = files.get_value("CONFIG_DRV_FILE")
        logging.info("Found drv_config_file %s" % drv_config_file)

        drv_comp = Component(drv_config_file)
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)

        test_argv = "-testname %s -testroot %s" % (test, self._test_root)
        if (self._generate):
            test_argv += " -generate %s" % self._baseline_gen_name
            envtest.set_value("BASELINE_NAME_GEN",self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE",os.path.join(self._baseline_gen_name,test))
        if (self._compare):
            test_argv += " -compare %s" % self._baseline_cmp_name
            envtest.set_value("BASELINE_NAME_CMP",self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE",os.path.join(self._baseline_cmp_name,test))

        envtest.set_value("TEST_ARGV",test_argv)
        envtest.set_value("CLEANUP", ("TRUE" if self._clean else "FALSE"))

        if (self._generate or self._compare):
            envtest.set_value("BASELINE_ROOT", self._baseline_root)

        envtest.set_value("GENERATE_BASELINE", "TRUE" if self._generate else "FALSE")
        envtest.set_value("COMPARE_BASELINE", "TRUE" if self._compare else "FALSE")
        envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC",resolved=False))
        envtest.write()
        return True

    ###########################################################################
    def _setup_phase(self, test):
    ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]
        test_dir  = self._get_test_dir(test)
        test_case_definition_dir = os.path.join(self._cime_root, "scripts", "Testing", "Testcases")
        test_build = os.path.join(test_dir, "case.test_build" )

        if (os.path.exists(os.path.join(test_case_definition_dir, "%s_build.csh" % test_case))):
            shutil.copy(os.path.join(test_case_definition_dir, "%s_build.csh" % test_case), test_build)
        else:
            shutil.copy(os.path.join(test_case_definition_dir, "tests_build.csh"), test_build)

        return self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)

    ###########################################################################
    def _nlcomp_phase(self, test):
    ###########################################################################
        test_dir          = self._get_test_dir(test)
        casedoc_dir       = os.path.join(test_dir, "CaseDocs")
        compare_nl        = os.path.join(CIME.utils.get_acme_scripts_root(), "compare_namelists")
        simple_compare    = os.path.join(CIME.utils.get_acme_scripts_root(), "simple_compare")

        if (self._compare):
            has_fails = False
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_cmp_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")

            # Start off by comparing everything in CaseDocs except a few arbitrary files (ugh!)
            # TODO: Namelist files should have consistent suffix
            all_items_to_compare = \
                [ item for item in glob.glob("%s/*" % casedoc_dir) if "README" not in os.path.basename(item) and not item.endswith("doc") and not item.endswith("prescribed") and not os.path.basename(item).startswith(".")] + \
                glob.glob("%s/*user_nl*" % test_dir)
            for item in all_items_to_compare:
                baseline_counterpart = os.path.join(baseline_casedocs if os.path.dirname(item).endswith("CaseDocs") else baseline_dir,
                                                    os.path.basename(item))
                if (not os.path.exists(baseline_counterpart)):
                    self._log_output(test, "Missing baseline namelist '%s'" % baseline_counterpart)
                    has_fails = True
                else:
                    if (compare_namelists.is_namelist_file(item)):
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" % (compare_nl, baseline_counterpart, item, test), ok_to_fail=True)
                    else:
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" % (simple_compare, baseline_counterpart, item, test), ok_to_fail=True)

                    if (rc != 0):
                        has_fails = True
                        self._log_output(test, output)

            if (has_fails):
                self._test_has_nl_problem(test)

        if (self._generate):
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_gen_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")
            if (not os.path.isdir(baseline_dir)):
                os.makedirs(baseline_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH)

            if (os.path.isdir(baseline_casedocs)):
                shutil.rmtree(baseline_casedocs)
            shutil.copytree(casedoc_dir, baseline_casedocs)
            for item in glob.glob(os.path.join(test_dir, "user_nl*")):
                shutil.copy2(item, baseline_dir)

        # Always mark as passed unless we hit exception
        return True

    ###########################################################################
    def _build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.test_build", BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        # wallclock is an optional field in the version 2.0 testlist.xml file
        # setting wallclock time close to the expected test time will help queue throughput
        if (test in self._test_data and "wallclock" in self._test_data[test]):
            run_cmd("./xmlchange JOB_WALLCLOCK_TIME=%s" % self._test_data[test]["wallclock"], from_dir=test_dir)

        return self._shell_cmd_for_phase(test, "./case.submit", RUN_PHASE, from_dir=test_dir)

    ###########################################################################
    def _update_test_status_file(self, test):
    ###########################################################################
        # TODO: The run scripts heavily use the TestStatus file. So we write out
        # the phases we have taken care of and then let the run scrips go from there
        # Eventually, it would be nice to have TestStatus management encapsulated
        # into a single place.
        str_to_write = ""
        made_it_to_phase = self._get_test_phase(test)
        made_it_to_phase_idx = self._phases.index(made_it_to_phase)
        for phase in self._phases[0:made_it_to_phase_idx+1]:
            str_to_write += "%s %s %s\n" % (self._get_test_status(test, phase), test, phase)

        if (not self._no_run and not self._is_broken(test) and made_it_to_phase == BUILD_PHASE):
            # Ensure PEND state always gets added to TestStatus file if we are
            # about to run test
            str_to_write += "%s %s %s\n" % (TEST_PENDING_STATUS, test, RUN_PHASE)

        test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
        with open(test_status_file, "w") as fd:
            fd.write(str_to_write)

    ###########################################################################
    def _run_catch_exceptions(self, test, phase, run):
    ###########################################################################
        try:
            return run(test)
        except Exception as e:
            exc_tb = sys.exc_info()[2]
            errput = "Test '%s' failed in phase '%s' with exception '%s'" % (test, phase, str(e))
            self._log_output(test, errput)
            logging.warning("Caught exception: %s" % str(e))
            traceback.print_tb(exc_tb)
            return False

    ###########################################################################
    def _get_procs_needed(self, test, phase):
    ###########################################################################
        if (phase == RUN_PHASE and self._no_batch):
            test_dir = self._get_test_dir(test)
            out = run_cmd("./xmlquery TOTALPES -value", from_dir=test_dir)
            return int(out)
        else:
            return 1

    ###########################################################################
    def _handle_test_status_file(self, test, test_phase, success):
    ###########################################################################
        #
        # This complexity is due to sharing of TestStatus responsibilities
        #
        try:
            if (test_phase != RUN_PHASE and
                (not success or test_phase == BUILD_PHASE or test_phase == self._phases[-1])):
                self._update_test_status_file(test)

            # If we failed VERY early on in the run phase, it's possible that
            # the CIME scripts never got a chance to set the state.
            elif (test_phase == RUN_PHASE and not success):
                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
                statuses = wait_for_tests.parse_test_status_file(test_status_file)[0]
                if ( RUN_PHASE not in statuses or
                     statuses[RUN_PHASE] in [TEST_PASS_STATUS, TEST_PENDING_STATUS] ):
                    self._update_test_status_file(test)

        except Exception as e:
            # TODO: What to do here? This failure is very severe because the
            # only way for test results to be communicated is by the TestStatus
            # file.
            logging.critical("VERY BAD! Could not handle TestStatus file '%s': '%s'" %
                    (os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME), str(e)))
            thread.interrupt_main()

    ###########################################################################
    def _wait_for_something_to_finish(self, threads_in_flight):
    ###########################################################################
        expect(len(threads_in_flight) <= self._parallel_jobs, "Oversubscribed?")
        finished_tests = []
        while (not finished_tests):
            for test, thread_info in threads_in_flight.iteritems():
                if (not thread_info[0].is_alive()):
                    finished_tests.append( (test, thread_info[1]) )

            if (not finished_tests):
                time.sleep(0.2)

        for finished_test, procs_needed in finished_tests:
            self._proc_pool += procs_needed
            del threads_in_flight[finished_test]

    ###########################################################################
    def _consumer(self, test, test_phase, phase_method):
    ###########################################################################
        before_time = time.time()
        success = self._run_catch_exceptions(test, test_phase, phase_method)
        elapsed_time = time.time() - before_time
        status  = (TEST_PENDING_STATUS if test_phase == RUN_PHASE and not self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS

        if (status != TEST_PENDING_STATUS):
            self._update_test_status(test, test_phase, status)
        self._handle_test_status_file(test, test_phase, success)

        status_str = "Finished %s for test %s in %f seconds (%s)\n" % (test_phase, test, elapsed_time, status)
        if (not success):
            status_str += "    Case dir: %s\n" % self._get_test_dir(test)
        sys.stdout.write(status_str)

    ###########################################################################
    def _producer(self):
    ###########################################################################
        threads_in_flight = {} # test-name -> (thread, procs)
        while (True):
            work_to_do = False
            num_threads_launched_this_iteration = 0
            for test in self._tests:
                logging.info("test_name: " + test)
                # If we have no workers available, immediately wait
                if (len(threads_in_flight) == self._parallel_jobs):
                    self._wait_for_something_to_finish(threads_in_flight)

                if (self._work_remains(test)):
                    work_to_do = True
                    if (test not in threads_in_flight):
                        test_phase, test_status, _ = self._get_test_data(test)
                        expect(test_status != TEST_PENDING_STATUS, test)
                        next_phase = self._phases[self._phases.index(test_phase) + 1]
                        procs_needed = self._get_procs_needed(test, next_phase)

                        if (procs_needed <= self._proc_pool):
                            self._proc_pool -= procs_needed

                            # Necessary to print this way when multiple threads printing
                            sys.stdout.write("Starting %s for test %s with %d procs\n" % (next_phase, test, procs_needed))

                            self._update_test_status(test, next_phase, TEST_PENDING_STATUS)
                            t = threading.Thread(target=self._consumer,
                                                 args=(test, next_phase, getattr(self, "_%s_phase" % next_phase.lower()) ))
                            threads_in_flight[test] = (t, procs_needed)
                            t.start()
                            num_threads_launched_this_iteration += 1

            if (not work_to_do):
                break

            if (num_threads_launched_this_iteration == 0):
                # No free resources, wait for something in flight to finish
                self._wait_for_something_to_finish(threads_in_flight)

        for thread_info in threads_in_flight.values():
            thread_info[0].join()

    ###########################################################################
    def _setup_cs_files(self):
    ###########################################################################
        try:
            python_libs_root = CIME.utils.get_python_libs_root()
            acme_scripts_root = CIME.utils.get_acme_scripts_root()
            template_file = os.path.join(python_libs_root, "cs.status.template")
            template = open(template_file, "r").read()
            template = template.replace("<PATH>", acme_scripts_root).replace("<TESTID>", self._test_id)

            cs_status_file = os.path.join(self._test_root, "cs.status.%s" % self._test_id)
            with open(cs_status_file, "w") as fd:
                fd.write(template)
            os.chmod(cs_status_file, os.stat(cs_status_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            template_file = os.path.join(python_libs_root, "cs.submit.template")
            template = open(template_file, "r").read()
            build_cmd = "./*.test_build" if self._no_build else ":"
            run_cmd = "./*.test" if self._no_batch else "./*.submit"
            template = template.replace("<BUILD_CMD>", build_cmd).replace("<RUN_CMD>", run_cmd).replace("<TESTID>", self._test_id)

            if (self._no_build or self._no_run):
                cs_submit_file = os.path.join(self._test_root, "cs.submit.%s" % self._test_id)
                with open(cs_submit_file, "w") as fd:
                    fd.write(template)
                os.chmod(cs_submit_file, os.stat(cs_submit_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

        except Exception as e:
            logging.warning("FAILED to set up cs files: %s" % str(e))

    ###########################################################################
    def system_test(self):
    ###########################################################################
        """
        Main API for this class.

        Return True if all tests passed.
        """
        start_time = time.time()

        # Tell user what will be run
        print "RUNNING TESTS:"
        for test in self._tests:
            print " ", test

        # TODO - documentation

        self._producer()

        expect(threading.active_count() == 1, "Leftover threads?")

        # Setup cs files
        self._setup_cs_files()

        # Return True if all tests passed
        print "At system_test close, state is:"
        rv = True
        for test in self._tests:
            phase, status, nl_fail = self._get_test_data(test)
            logging.debug("phase %s status %s" % (phase, status))
            if (status == TEST_PASS_STATUS and phase == RUN_PHASE):
                # Be cautious about telling the user that the test passed. This
                # status should match what they would see on the dashboard. Our
                # self._test_states does not include comparison fail information,
                # so we need to parse test status.
                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
                status = wait_for_tests.interpret_status_file(test_status_file)[1]

            if (status not in [TEST_PASS_STATUS, TEST_PENDING_STATUS]):
                print "%s %s (phase %s)" % (status, test, phase)
                rv = False

            elif (nl_fail):
                print "%s %s (but otherwise OK)" % (NAMELIST_FAIL_STATUS, test)
                rv = False

            else:
                print status, test, phase

            print "    Case dir: %s" % self._get_test_dir(test)

        print "system_test took", time.time() - start_time, "seconds"

        return rv
Ejemplo n.º 45
0
            case.load_env()

        models = case.get_values("COMP_CLASSES")
        mach = case.get_value("MACH")
        compiler = case.get_value("COMPILER")
        debug = case.get_value("DEBUG")
        mpilib = case.get_value("MPILIB")
        sysos = case.get_value("OS")
        comp_interface = case.get_value("COMP_INTERFACE")
        expect(mach is not None, "xml variable MACH is not set")

        # creates the Macros.make, Depends.compiler, Depends.machine, Depends.machine.compiler
        # and env_mach_specific.xml if they don't already exist.
        if not os.path.isfile("Macros.make") or not os.path.isfile(
                "env_mach_specific.xml"):
            configure(Machines(machine=mach), caseroot, ["Makefile"], compiler,
                      mpilib, debug, comp_interface, sysos)

        # Set tasks to 1 if mpi-serial library
        if mpilib == "mpi-serial":
            for vid, value in case:
                if vid.startswith("NTASKS") and value != 1:
                    case.set_value(vid, 1)

        # Check ninst.
        # In CIME there can be multiple instances of each component model (an ensemble) NINST is the instance of that component.
        comp_interface = case.get_value("COMP_INTERFACE")
        if comp_interface == "nuopc":
            ninst = case.get_value("NINST")

        multi_driver = case.get_value("MULTI_DRIVER")
Ejemplo n.º 46
0
Archivo: pgn.py Proyecto: fmyuan/cime
    def _compare_baseline(self):
        """
        Compare baselines in the pergro test sense. That is,
        compare PGE from the test simulation with the baseline
        cloud
        """
        with self._test_status:
            self._test_status.set_status(CIME.test_status.BASELINE_PHASE,
                                         CIME.test_status.TEST_FAIL_STATUS)

            logger.debug("PGN_INFO:BASELINE COMPARISON STARTS")

            run_dir = self._case.get_value("RUNDIR")
            case_name = self._case.get_value("CASE")
            base_dir = os.path.join(self._case.get_value("BASELINE_ROOT"),
                                    self._case.get_value("BASECMP_CASE"))

            var_list = self.get_var_list()

            test_name = "{}".format(case_name.split('.')[-1])
            evv_config = {
                test_name: {
                    "module": os.path.join(evv_lib_dir, "extensions", "pg.py"),
                    "test-case": case_name,
                    "test-name": "Test",
                    "test-dir": run_dir,
                    "ref-name": "Baseline",
                    "ref-dir": base_dir,
                    "variables": var_list,
                    "perturbations": PERTURBATIONS,
                    "pge-cld": FCLD_NC,
                    "ninit": NUMBER_INITIAL_CONDITIONS,
                    "init-file-template": INIT_COND_FILE_TEMPLATE,
                    "instance-file-template": INSTANCE_FILE_TEMPLATE,
                    "init-model": "cam",
                    "component": self.atmmod,
                }
            }

            json_file = os.path.join(run_dir, '.'.join([case_name, 'json']))
            with open(json_file, 'w') as config_file:
                json.dump(evv_config, config_file, indent=4)

            evv_out_dir = os.path.join(run_dir, '.'.join([case_name, 'evv']))
            evv(['-e', json_file, '-o', evv_out_dir])

            with open(os.path.join(evv_out_dir, 'index.json'), 'r') as evv_f:
                evv_status = json.load(evv_f)

            comments = ""
            for evv_elem in evv_status['Data']['Elements']:
                if evv_elem['Type'] == 'ValSummary' \
                        and evv_elem['TableTitle'] == 'Perturbation growth test':
                    comments = "; ".join("{}: {}".format(key, val)
                                         for key, val in evv_elem['Data']
                                         [test_name][''].items())
                    if evv_elem['Data'][test_name]['']['Test status'].lower(
                    ) == 'pass':
                        self._test_status.set_status(
                            CIME.test_status.BASELINE_PHASE,
                            CIME.test_status.TEST_PASS_STATUS)
                    break

            status = self._test_status.get_status(
                CIME.test_status.BASELINE_PHASE)
            mach_name = self._case.get_value("MACH")
            mach_obj = Machines(machine=mach_name)
            htmlroot = CIME.utils.get_htmlroot(mach_obj)
            urlroot = CIME.utils.get_urlroot(mach_obj)
            if htmlroot is not None:
                with CIME.utils.SharedArea():
                    dir_util.copy_tree(evv_out_dir,
                                       os.path.join(htmlroot, 'evv',
                                                    case_name),
                                       preserve_mode=False)
                if urlroot is None:
                    urlroot = "[{}_URL]".format(mach_name.capitalize())
                viewing = "{}/evv/{}/index.html".format(urlroot, case_name)
            else:
                viewing = "{}\n" \
                          "    EVV viewing instructions can be found at: " \
                          "        https://github.com/E3SM-Project/E3SM/blob/master/cime/scripts/" \
                          "climate_reproducibility/README.md#test-passfail-and-extended-output" \
                          "".format(evv_out_dir)

            comments = "{} {} for test '{}'.\n" \
                       "    {}\n" \
                       "    EVV results can be viewed at:\n" \
                       "        {}".format(CIME.test_status.BASELINE_PHASE, status, test_name, comments, viewing)

            CIME.utils.append_testlog(comments, self._orig_caseroot)
Ejemplo n.º 47
0
    def __init__(self,
                 test_names,
                 test_data=None,
                 no_run=False,
                 no_build=False,
                 no_setup=False,
                 no_batch=None,
                 test_root=None,
                 test_id=None,
                 machine_name=None,
                 compiler=None,
                 baseline_root=None,
                 baseline_cmp_name=None,
                 baseline_gen_name=None,
                 clean=False,
                 namelists_only=False,
                 project=None,
                 parallel_jobs=None,
                 walltime=None,
                 proc_pool=None,
                 use_existing=False,
                 save_timing=False,
                 queue=None,
                 allow_baseline_overwrite=False,
                 output_root=None):
        ###########################################################################
        self._cime_root = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        self._allow_baseline_overwrite = allow_baseline_overwrite
        self._save_timing = save_timing
        self._queue = queue
        self._test_data = {} if test_data is None else test_data  # Format:  {test_name -> {data_name -> data}}

        self._machobj = Machines(machine=machine_name)

        self._no_setup = no_setup
        self._no_build = no_build or no_setup or namelists_only
        self._no_run = no_run or self._no_build
        self._output_root = output_root
        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()
        expect(not (self._no_batch and self._queue is not None),
               "Does not make sense to request a queue without batch system")

        # Determine and resolve test_root
        if test_root is not None:
            self._test_root = test_root
        elif self._output_root is not None:
            self._test_root = self._output_root
        else:
            self._test_root = self._machobj.get_value("CIME_OUTPUT_ROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT",
                                                      self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id = test_id if test_id is not None else CIME.utils.get_timestamp(
        )

        self._compiler = self._machobj.get_default_compiler(
        ) if compiler is None else compiler

        self._clean = clean
        self._namelists_only = namelists_only

        self._walltime = walltime

        if parallel_jobs is None:
            self._parallel_jobs = min(
                len(test_names),
                int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = baseline_cmp_name  # Implies comparison should be done if not None
        self._baseline_gen_name = baseline_gen_name  # Implies generation should be done if not None

        if baseline_cmp_name or baseline_gen_name:
            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None \
                else self._machobj.get_value("BASELINE_ROOT")

            if self._project is not None:
                self._baseline_root = self._baseline_root.replace(
                    "$PROJECT", self._project)

            self._baseline_root = os.path.abspath(self._baseline_root)

            if self._baseline_cmp_name:
                full_baseline_dir = os.path.join(self._baseline_root,
                                                 self._baseline_cmp_name)
                expect(
                    os.path.isdir(full_baseline_dir),
                    "Missing baseline comparison directory %s" %
                    full_baseline_dir)

            # the following is to assure that the existing generate directory is not overwritten
            if self._baseline_gen_name:
                full_baseline_dir = os.path.join(self._baseline_root,
                                                 self._baseline_gen_name)
                existing_baselines = []
                for test_name in test_names:
                    test_baseline = os.path.join(full_baseline_dir, test_name)
                    if os.path.isdir(test_baseline):
                        existing_baselines.append(test_baseline)
                expect(allow_baseline_overwrite or len(existing_baselines) == 0,
                           "Baseline directories already exists %s\n"\
                           "Use --allow_baseline_overwrite to avoid this error"%existing_baselines)
        else:
            self._baseline_root = None

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # name -> (phase, status)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (TEST_START, TEST_PASS_STATUS)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("PES_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if self._no_setup:
            self._phases.remove(SETUP_PHASE)
        if self._no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if self._no_run:
            self._phases.remove(RUN_PHASE)

        if use_existing:
            for test in self._tests:
                ts = TestStatus(self._get_test_dir(test))
                for phase, status in ts:
                    if phase in CORE_PHASES:
                        if status in [TEST_PEND_STATUS, TEST_FAIL_STATUS]:
                            # We need to pick up here
                            break
                        else:
                            self._update_test_status(test, phase,
                                                     TEST_PEND_STATUS)
                            self._update_test_status(test, phase, status)
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(
                    not os.path.exists(self._get_test_dir(test)),
                    "Cannot create new case in directory '%s', it already exists."
                    " Pick a different test-id" % self._get_test_dir(test))
Ejemplo n.º 48
0
class SystemTest(object):
###############################################################################

    ###########################################################################
    def __init__(self, test_names,
                 no_run=False, no_build=False, no_batch=None,
                 test_root=None, test_id=None,
                 machine_name=None, compiler=None,
                 baseline_root=None, baseline_name=None,
                 clean=False, compare=False, generate=False, namelists_only=False,
                 project=None, parallel_jobs=None,
                 xml_machine=None, xml_compiler=None, xml_category=None,
                 xml_testlist=None, walltime=None, proc_pool=None,
                 use_existing=False):
    ###########################################################################
        self._cime_root = CIME.utils.get_cime_root()
        self._cime_model = CIME.utils.get_model()
        # needed for perl interface
        os.environ["CIMEROOT"] = self._cime_root

        # if machine_name is set use it, otherwise if xml_machine is set use it,
        # otherwise probe for machine_name
        if machine_name is None:
            machine_name = xml_machine

        self._machobj = Machines(machine=machine_name)
        machine_name = self._machobj.get_machine_name()

        self._no_build = no_build if not namelists_only else True
        self._no_run = no_run if not self._no_build else True

        # Figure out what project to use
        if project is None:
            self._project = CIME.utils.get_project()
            if self._project is None:
                self._project = self._machobj.get_value("PROJECT")
        else:
            self._project = project

        # We will not use batch system if user asked for no_batch or if current
        # machine is not a batch machine
        self._no_batch = no_batch or not self._machobj.has_batch_system()

        self._test_root = test_root if test_root is not None \
            else self._machobj.get_value("CESMSCRATCHROOT")

        if self._project is not None:
            self._test_root = self._test_root.replace("$PROJECT", self._project)

        self._test_root = os.path.abspath(self._test_root)
        self._test_id   = test_id if test_id is not None else CIME.utils.get_utc_timestamp()

        # if compiler is set use it, otherwise if xml_compiler is set use it,
        # otherwise use the default compiler for the machine
        if compiler is not None:
            self._compiler = compiler
        elif xml_compiler is not None:
            self._compiler = xml_compiler
        else:
            self._compiler = self._machobj.get_default_compiler()

        expect(self._machobj.is_valid_compiler(self._compiler),
               "Compiler %s not valid for machine %s" % (self._compiler, machine_name))

        self._clean          = clean
        self._namelists_only = namelists_only

        # Extra data associated with tests, do not modify after construction
        # test_name -> test_data
        #   test_data: name -> value
        self._test_xml = {}

        # If xml options are provided get tests from xml file, otherwise use acme dictionary
        if not test_names and (xml_machine is not None or xml_category is not None or
                               xml_compiler is not None or xml_testlist is not None):
            test_data = CIME.test_utils.get_tests_from_xml(xml_machine, xml_category,
                                                           xml_compiler, xml_testlist,
                                                           machine_name, compiler)
            test_names = [item["name"] for item in test_data]
            for test_datum in test_data:
                self._test_xml[test_datum["name"]] = test_datum
        else:
            expect(len(test_names) > 0, "No tests to run")
            test_names = update_acme_tests.get_full_test_names(test_names,
                                                               machine_name, self._compiler)

        if walltime is not None:
            for test in test_names:
                if test in self._test_xml:
                    test_datum = self._test_xml[test]
                else:
                    test_datum = {}
                    self._test_xml[test] = test_datum
                test_datum["wallclock"] = walltime


        if parallel_jobs is None:
            self._parallel_jobs = min(len(test_names),
                                      int(self._machobj.get_value("MAX_TASKS_PER_NODE")))
        else:
            self._parallel_jobs = parallel_jobs

        self._baseline_cmp_name = None
        self._baseline_gen_name = None
        self._compare = False
        self._generate = False
        if compare or generate:
            # Figure out what baseline name to use
            if baseline_name is None:
                if compare is not None and isinstance(compare, str):
                    self._baseline_cmp_name = compare
                    self._compare = True
                if generate is not None and isinstance(generate, str):
                    self._baseline_gen_name = generate
                    self._generate = True

                if self._compare and self._baseline_cmp_name is None:
                    branch_name = CIME.utils.get_current_branch(repo=self._cime_root)
                    expect(branch_name is not None,
                           "Could not determine baseline name from branch, please use -b option")
                    self._baseline_cmp_name = os.path.join(self._compiler, branch_name)
                if self._generate and self._baseline_gen_name is None:
                    branch_name = CIME.utils.get_current_branch(repo=self._cime_root)
                    expect(branch_name is not None,
                           "Could not determine baseline name from branch, please use -b option")
                    self._baseline_gen_name = os.path.join(self._compiler, branch_name)
            else:
                if compare:
                    self._compare = True
                    self._baseline_cmp_name = baseline_name
                    if not self._baseline_cmp_name.startswith("%s/" % self._compiler):
                        self._baseline_cmp_name = os.path.join(self._compiler,
                                                               self._baseline_cmp_name)
                if generate:
                    self._generate = True
                    self._baseline_gen_name  = baseline_name
                    if not self._baseline_gen_name.startswith("%s/" % self._compiler):
                        self._baseline_gen_name = os.path.join(self._compiler,
                                                               self._baseline_gen_name)

            # Compute baseline_root
            self._baseline_root = baseline_root if baseline_root is not None \
                else self._machobj.get_value("CCSM_BASELINE")

            if self._project is not None:
                self._baseline_root = self._baseline_root.replace("$PROJECT", self._project)

            self._baseline_root = os.path.abspath(self._baseline_root)

            if self._compare:
                full_baseline_dir = os.path.join(self._baseline_root, self._baseline_cmp_name)
                expect(os.path.isdir(full_baseline_dir),
                       "Missing baseline comparison directory %s" % full_baseline_dir)
        else:
            self._baseline_root = None

        # This is the only data that multiple threads will simultaneously access
        # Each test has it's own value and setting/retrieving items from a dict
        # is atomic, so this should be fine to use without mutex.
        # Since the name-list phase can fail without aborting later phases, we
        # need some extra state to remember tests that had namelist problems.
        # name -> (phase, status, has_namelist_problem)
        self._tests = {}
        for test_name in test_names:
            self._tests[test_name] = (INITIAL_PHASE, TEST_PASS_STATUS, False)

        # Oversubscribe by 1/4
        if proc_pool is None:
            pes = int(self._machobj.get_value("PES_PER_NODE"))
            self._proc_pool = int(pes * 1.25)
        else:
            self._proc_pool = int(proc_pool)

        self._procs_avail = self._proc_pool

        # Setup phases
        self._phases = list(PHASES)
        if no_build:
            self._phases.remove(SHAREDLIB_BUILD_PHASE)
            self._phases.remove(MODEL_BUILD_PHASE)
        if no_run:
            self._phases.remove(RUN_PHASE)
        if not self._compare and not self._generate:
            self._phases.remove(NAMELIST_PHASE)

        if use_existing:
            for test in self._tests:
                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
                statuses = wait_for_tests.parse_test_status_file(test_status_file)[0]
                for phase, status in statuses.iteritems():
                    if phase != INITIAL_PHASE:
                        self._update_test_status(test, phase, TEST_PENDING_STATUS)
                        self._update_test_status(test, phase, status)
        else:
            # None of the test directories should already exist.
            for test in self._tests:
                expect(not os.path.exists(self._get_test_dir(test)),
                       "Cannot create new case in directory '%s', it already exists."
                       " Pick a different test-id" % self._get_test_dir(test))

        # By the end of this constructor, this program should never hard abort,
        # instead, errors will be placed in the TestStatus files for the various
        # tests cases

    ###########################################################################
    def _log_output(self, test, output):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        if not os.path.isdir(test_dir):
            # Note: making this directory could cause create_newcase to fail
            # if this is run before.
            os.makedirs(test_dir)
        append_status(output,caseroot=test_dir,sfile="TestStatus.log")

    ###########################################################################
    def _get_case_id(self, test):
    ###########################################################################
        baseline_action_code = ".C" if self._compare else (".G" if self._generate else "")
        return "%s%s.%s" % (test, baseline_action_code, self._test_id)

    ###########################################################################
    def _get_test_dir(self, test):
    ###########################################################################
        return os.path.join(self._test_root, self._get_case_id(test))

    ###########################################################################
    def _get_test_data(self, test):
    ###########################################################################
        # Must be atomic
        return self._tests[test]

    ###########################################################################
    def _is_broken(self, test):
    ###########################################################################
        status = self._get_test_status(test)
        return status not in CONTINUE and status != TEST_PENDING_STATUS

    ###########################################################################
    def _work_remains(self, test):
    ###########################################################################
        test_phase, test_status, _ = self._get_test_data(test)
        return (test_status in CONTINUE or test_status == TEST_PENDING_STATUS) and\
            test_phase != self._phases[-1]

    ###########################################################################
    def _get_test_status(self, test, phase=None):
    ###########################################################################
        curr_phase, curr_status, nl_fail = self._get_test_data(test)
        if phase == NAMELIST_PHASE and nl_fail:
            return NAMELIST_FAIL_STATUS
        elif phase is None or phase == curr_phase:
            return curr_status
        else:
            expect(phase is None or self._phases.index(phase) < self._phases.index(curr_phase),
                   "Tried to see the future")
            # Assume all older phases PASSed
            return TEST_PASS_STATUS

    ###########################################################################
    def _get_test_phase(self, test):
    ###########################################################################
        return self._get_test_data(test)[0]

    ###########################################################################
    def _update_test_status(self, test, phase, status):
    ###########################################################################
        phase_idx = self._phases.index(phase)
        old_phase, old_status, old_nl_fail = self._get_test_data(test)

        if old_phase == phase:
            expect(old_status == TEST_PENDING_STATUS,
                   "Only valid to transition from PENDING to something else, found '%s' for phase '%s'" %
                   (old_status, phase))
            expect(status != TEST_PENDING_STATUS,
                   "Cannot transition from PEND -> PEND")
        else:
            expect(old_status in CONTINUE,
                   "Why did we move on to next phase when prior phase did not pass?")
            expect(status == TEST_PENDING_STATUS,
                   "New phase should be set to pending status")
            expect(self._phases.index(old_phase) == phase_idx - 1,
                   "Skipped phase?")
        # Must be atomic
        self._tests[test] = (phase, status, old_nl_fail)

    ###########################################################################
    def _test_has_nl_problem(self, test):
    ###########################################################################
        curr_phase, curr_status, _ = self._get_test_data(test)
        expect(curr_phase == NAMELIST_PHASE, "Setting namelist status outside of namelist phase?")
        # Must be atomic
        self._tests[test] = (curr_phase, curr_status, True)

    ###########################################################################
    def _shell_cmd_for_phase(self, test, cmd, phase, from_dir=None):
    ###########################################################################
        while True:
            rc, output, errput = run_cmd(cmd, ok_to_fail=True, from_dir=from_dir)
            if rc != 0:
                self._log_output(test,
                                 "%s FAILED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                 (phase, test, cmd, output, errput))
                # Temporary hack to get around odd file descriptor use by
                # buildnml scripts.
                if "bad interpreter" in errput:
                    time.sleep(1)
                    continue
                else:
                    break
            else:
                self._log_output(test,
                                 "%s PASSED for test '%s'.\nCommand: %s\nOutput: %s\n\nErrput: %s" %
                                 (phase, test, cmd, output, errput))
                break

        return rc == 0

    ###########################################################################
    def _create_newcase_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)

        _, case_opts, grid, compset,\
            machine, compiler, test_mods = CIME.utils.parse_test_name(test)
        if compiler != self._compiler:
            raise StandardError("Test '%s' has compiler that does"
                                " not match instance compliler '%s'" % (test, self._compiler))

        create_newcase_cmd = "%s --case %s --res %s --mach %s --compiler %s --compset %s"\
                               " --project %s --test"%\
                              (os.path.join(self._cime_root, "scripts", "create_newcase"),
                               test_dir, grid, machine, compiler, compset, self._project)

        if test_mods is not None:
            files = Files()
            (component,modspath) = test_mods.split('/',1)
            testmods_dir = files.get_value("TESTS_MODS_DIR", {"component": component})

            test_mod_file = os.path.join(testmods_dir, component, modspath)
            if not os.path.exists(test_mod_file):
                self._log_output(test, "Missing testmod file '%s'" % test_mod_file)
                return False
            create_newcase_cmd += " --user-mods-dir %s" % test_mod_file

        if case_opts is not None:
            for case_opt in case_opts:
                if case_opt.startswith('M'):
                    mpilib = case_opt[1:]
                    create_newcase_cmd += " --mpilib %s" % mpilib
                    logger.debug (" MPILIB set to %s" % mpilib)
                if case_opt.startswith('N'):
                    ninst = case_opt[1:]
                    create_newcase_cmd += " --ninst %s" %ninst
                    logger.debug (" NINST set to %s" % ninst)
                pesize = re.match('P([SMLX][12]?)', case_opt)
                if pesize:
                    create_newcase_cmd += " --pecount %s"%pesize.group(1)


        logger.debug("Calling create_newcase: " + create_newcase_cmd)
        return self._shell_cmd_for_phase(test, create_newcase_cmd, CREATE_NEWCASE_PHASE)

    ###########################################################################
    def _xml_phase(self, test):
    ###########################################################################
        test_case = CIME.utils.parse_test_name(test)[0]

        # Create, fill and write an envtest object
        test_dir = self._get_test_dir(test)
        envtest = EnvTest(test_dir)

        # Determine list of component classes that this coupler/driver knows how
        # to deal with. This list follows the same order as compset longnames follow.
        files = Files()
        drv_config_file = files.get_value("CONFIG_DRV_FILE")
        drv_comp = Component(drv_config_file)
        component_classes = drv_comp.get_valid_model_components()
        envtest.add_elements_by_group(drv_comp, {}, "env_test.xml")
        envtest.set_value("TESTCASE", test_case)
        envtest.set_value("TEST_TESTID", self._test_id)
        envtest.set_value("CASEBASEID", test)

        test_argv = "-testname %s -testroot %s" % (test, self._test_root)
        if self._generate:
            test_argv += " -generate %s" % self._baseline_gen_name
            envtest.set_value("BASELINE_NAME_GEN", self._baseline_gen_name)
            envtest.set_value("BASEGEN_CASE", os.path.join(self._baseline_gen_name, test))
        if self._compare:
            test_argv += " -compare %s" % self._baseline_cmp_name
            envtest.set_value("BASELINE_NAME_CMP", self._baseline_cmp_name)
            envtest.set_value("BASECMP_CASE", os.path.join(self._baseline_cmp_name, test))

        envtest.set_value("TEST_ARGV", test_argv)
        envtest.set_value("CLEANUP", self._clean)

        if self._generate or self._compare:
            envtest.set_value("BASELINE_ROOT", self._baseline_root)
        envtest.set_value("GENERATE_BASELINE", self._generate)
        envtest.set_value("COMPARE_BASELINE", self._compare)
        envtest.set_value("CCSM_CPRNC", self._machobj.get_value("CCSM_CPRNC", resolved=False))

        """
        Add the test instructions from config_test to env_test in the case
        """
        config_test = Tests()
        testnode = config_test.get_test_node(test_case)
        envtest.add_test(testnode)

        # Determine the test_case from the test name
        test_case, case_opts = CIME.utils.parse_test_name(test)[:2]

        # Determine case_opts from the test_case
        if case_opts is not None:
            logger.debug("case_opts are %s " %case_opts)
            for opt in case_opts:

                logger.debug("case_opt is %s" %opt)
                if opt == 'D':
                    envtest.set_test_parameter("DEBUG", "TRUE")
                    logger.debug (" DEBUG set to TRUE")

                elif opt == 'E':
                    envtest.set_test_parameter("USE_ESMF_LIB", "TRUE")
                    envtest.set_test_parameter("COMP_INTERFACE", "ESMF")
                    logger.debug (" USE_ESMF_LIB set to TRUE")
                    logger.debug (" COMP_INTERFACE set to ESMF")

                elif opt == 'CG':
                    envtest.set_test_parameter("CALENDAR", "GREGORIAN")
                    logger.debug (" CALENDAR set to %s" %opt)

                elif opt.startswith('L'):
                    match =  re.match('L([A-Za-z])([0-9]*)', opt)
                    stop_option = {"y":"nyears", "m":"nmonths", "d":"ndays", "h":"nhours",
                                   "s":"nseconds", "n":"nsteps"}
                    opt = match.group(1)
                    envtest.set_test_parameter("STOP_OPTION",stop_option[opt])
                    opti = match.group(2)
                    envtest.set_test_parameter("STOP_N", opti)
                    logger.debug (" STOP_OPTION set to %s" %stop_option[opt])
                    logger.debug (" STOP_N      set to %s" %opti)

                elif opt.startswith('M'):
                    # M option handled by create newcase
                    continue

                elif opt.startswith('P'):
                    match1 =  re.match('P([0-9]+)', opt)
                    match2 =  re.match('P([0-9]+)x([0-9]+)', opt)
                    match3 =  re.match('P[SMLX][12]?', opt)

                    opti_tasks = None
                    if match1:
                        opti_tasks = match1.group(1)
                        for component_class in component_classes:
                            if component_class == "DRV":
                                component_class = "CPL"
                            string = "NTASKS_" + component_class
                            envtest.set_test_parameter(string, opti_tasks)
                            string = "NTHRDS_" + component_class
                            envtest.set_test_parameter(string, str(1))
                            string = "ROOTPE_" + component_class
                            envtest.set_test_parameter(string, str(0))
                        opti_thrds = 1
                    elif match2:
                        opti_tasks = match2.group(1)
                        opti_thrds = match2.group(2)
                        for component_class in component_classes:
                            if component_class == "DRV":
                                component_class = "CPL"
                            string = "NTASKS_" + component_class
                            envtest.set_test_parameter(string, opti_tasks)
                            string = "NTHRDS_" + component_class
                            envtest.set_test_parameter(string, opti_thrds)
                            string = "ROOTPE_" + component_class
                            envtest.set_test_parameter(string, str(0))
                    elif match3:

                        # handled by create_newcase
                        continue
                    if not match3:
                        expect(opti_tasks is not None, "No match found for PE option %s"%opt)
                        logger.debug (" NTASKS_xxx set to %s" %opti_tasks)
                        logger.debug (" NTHRDS_xxx set to %s" %opti_thrds)
                        logger.debug (" ROOTPE_xxx set to %s 0")

                elif opt.startswith('N'):
                    # handled in create_newcase
                    continue
                elif opt.startswith('IOP'):
                    logger.warn("IOP test option not yet implemented")
                else:
                    expect(False, "Could not parse option '%s' " %opt)

        envtest.write()
        lockedfiles = os.path.join(test_dir, "LockedFiles")
        try:
            os.stat(lockedfiles)
        except:
            os.mkdir(lockedfiles)
        shutil.copy(os.path.join(test_dir,"env_run.xml"),
                    os.path.join(lockedfiles, "env_run.orig.xml"))

        case = Case(test_dir)
        case.set_value("SHAREDLIBROOT",
                       os.path.join(self._test_root,
                                    "sharedlibroot.%s"%self._test_id))

        envtest.set_initial_values(case)
        return True

    ###########################################################################
    def _setup_phase(self, test):
    ###########################################################################
        test_dir  = self._get_test_dir(test)

        return self._shell_cmd_for_phase(test, "./case.setup", SETUP_PHASE, from_dir=test_dir)

    ###########################################################################
    def _nlcomp_phase(self, test):
    ###########################################################################
        test_dir       = self._get_test_dir(test)
        casedoc_dir    = os.path.join(test_dir, "CaseDocs")
        compare_nl     = os.path.join(CIME.utils.get_scripts_root(), "Tools", "compare_namelists")
        simple_compare = os.path.join(CIME.utils.get_scripts_root(), "Tools", "simple_compare")

        if self._compare:
            has_fails         = False
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_cmp_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")

            # Start off by comparing everything in CaseDocs except a few arbitrary files (ugh!)
            # TODO: Namelist files should have consistent suffix
            all_items_to_compare = [item for item in glob.glob("%s/*" % casedoc_dir)\
                                    if "README" not in os.path.basename(item)\
                                    and not item.endswith("doc")\
                                    and not item.endswith("prescribed")\
                                    and not os.path.basename(item).startswith(".")] + \
                                    glob.glob("%s/*user_nl*" % test_dir)
            for item in all_items_to_compare:
                baseline_counterpart = os.path.join(baseline_casedocs \
                                                    if os.path.dirname(item).endswith("CaseDocs") \
                                                    else baseline_dir,os.path.basename(item))
                if not os.path.exists(baseline_counterpart):
                    self._log_output(test, "Missing baseline namelist '%s'" % baseline_counterpart)
                    has_fails = True
                else:
                    if compare_namelists.is_namelist_file(item):
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" %
                                                 (compare_nl, baseline_counterpart, item, test),
                                                 ok_to_fail=True)
                    else:
                        rc, output, _  = run_cmd("%s %s %s -c %s 2>&1" %
                                                 (simple_compare, baseline_counterpart, item, test),
                                                 ok_to_fail=True)

                    if rc != 0:
                        has_fails = True
                        self._log_output(test, output)

            if has_fails:
                self._test_has_nl_problem(test)

        if self._generate:
            baseline_dir      = os.path.join(self._baseline_root, self._baseline_gen_name, test)
            baseline_casedocs = os.path.join(baseline_dir, "CaseDocs")
            if not os.path.isdir(baseline_dir):
                os.makedirs(baseline_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH)

            if os.path.isdir(baseline_casedocs):
                shutil.rmtree(baseline_casedocs)

            shutil.copytree(casedoc_dir, baseline_casedocs)
            os.chmod(baseline_casedocs, stat.S_IRWXU | stat.S_IRWXG | stat.S_IXOTH | stat.S_IROTH)
            for item in glob.glob("%s/*" % baseline_casedocs):
                os.chmod(item, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)

            for item in glob.glob(os.path.join(test_dir, "user_nl*")):
                preexisting_baseline = os.path.join(baseline_dir, os.path.basename(item))
                if (os.path.exists(preexisting_baseline)):
                    os.remove(preexisting_baseline)
                shutil.copy2(item, baseline_dir)
                os.chmod(preexisting_baseline, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)

        # Always mark as passed unless we hit exception
        return True

    ###########################################################################
    def _sharedlib_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --sharedlib-only", SHAREDLIB_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _model_build_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        return self._shell_cmd_for_phase(test, "./case.build --model-only", MODEL_BUILD_PHASE, from_dir=test_dir)

    ###########################################################################
    def _run_phase(self, test):
    ###########################################################################
        test_dir = self._get_test_dir(test)
        # wallclock is an optional field in the version 2.0 testlist.xml file
        # setting wallclock time close to the expected test time will help queue throughput
        if test in self._test_xml and "wallclock" in self._test_xml[test]:
            run_cmd("./xmlchange JOB_WALLCLOCK_TIME=%s" %
                    self._test_xml[test]["wallclock"], from_dir=test_dir)
        if self._no_batch:
            cmd = "./case.submit --no-batch"
        else:
            cmd = "./case.submit "

        return self._shell_cmd_for_phase(test, cmd, RUN_PHASE, from_dir=test_dir)

    ###########################################################################
    def _update_test_status_file(self, test):
    ###########################################################################
        # TODO: The run scripts heavily use the TestStatus file. So we write out
        # the phases we have taken care of and then let the run scrips go from there
        # Eventually, it would be nice to have TestStatus management encapsulated
        # into a single place.
        str_to_write = ""
        made_it_to_phase = self._get_test_phase(test)
        made_it_to_phase_idx = self._phases.index(made_it_to_phase)
        for phase in self._phases[0:made_it_to_phase_idx+1]:
            str_to_write += "%s %s %s\n" % (self._get_test_status(test, phase), test, phase)

        if not self._no_run and not self._is_broken(test) and made_it_to_phase == MODEL_BUILD_PHASE:
            # Ensure PEND state always gets added to TestStatus file if we are
            # about to run test
            str_to_write += "%s %s %s\n" % (TEST_PENDING_STATUS, test, RUN_PHASE)

        test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
        with open(test_status_file, "w") as fd:
            fd.write(str_to_write)

    ###########################################################################
    def _run_catch_exceptions(self, test, phase, run):
    ###########################################################################
        try:
            return run(test)
        except (SystemExit, Exception) as e:
            exc_tb = sys.exc_info()[2]
            errput = "Test '%s' failed in phase '%s' with exception '%s'" % (test, phase, str(e))
            self._log_output(test, errput)
            logger.warning("Caught exception: %s" % str(e))
            traceback.print_tb(exc_tb)
            return False

    ###########################################################################
    def _get_procs_needed(self, test, phase, threads_in_flight=None):
    ###########################################################################
        if phase == RUN_PHASE and self._no_batch:
            test_dir = self._get_test_dir(test)
            out = run_cmd("./xmlquery TOTALPES -value", from_dir=test_dir)
            return int(out)
        elif (phase == SHAREDLIB_BUILD_PHASE):
            # Will force serialization of sharedlib builds
            # TODO - instead of serializing, compute all library configs needed and build
            # them all in parallel
            for _, _, running_phase in threads_in_flight.values():
                if (running_phase == SHAREDLIB_BUILD_PHASE):
                    return self._proc_pool + 1

            return 1
        elif (phase == MODEL_BUILD_PHASE):
            # Model builds now happen in parallel
            return 4
        else:
            return 1

    ###########################################################################
    def _handle_test_status_file(self, test, test_phase, success):
    ###########################################################################
        #
        # This complexity is due to sharing of TestStatus responsibilities
        #
        try:
            if test_phase != RUN_PHASE and (not success or test_phase == MODEL_BUILD_PHASE
                                            or test_phase == self._phases[-1]):
                self._update_test_status_file(test)

            # If we failed VERY early on in the run phase, it's possible that
            # the CIME scripts never got a chance to set the state.
            elif test_phase == RUN_PHASE and not success:
                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
                statuses = wait_for_tests.parse_test_status_file(test_status_file)[0]
                if RUN_PHASE not in statuses or\
                   (statuses[RUN_PHASE] in [TEST_PASS_STATUS, TEST_PENDING_STATUS]):
                    self._update_test_status_file(test)

        except Exception as e:
            # TODO: What to do here? This failure is very severe because the
            # only way for test results to be communicated is by the TestStatus
            # file.
            logger.critical("VERY BAD! Could not handle TestStatus file '%s': '%s'" %
                             (os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME), str(e)))
            thread.interrupt_main()

    ###########################################################################
    def _wait_for_something_to_finish(self, threads_in_flight):
    ###########################################################################
        expect(len(threads_in_flight) <= self._parallel_jobs, "Oversubscribed?")
        finished_tests = []
        while not finished_tests:
            for test, thread_info in threads_in_flight.iteritems():
                if not thread_info[0].is_alive():
                    finished_tests.append((test, thread_info[1]))

            if not finished_tests:
                time.sleep(0.2)

        for finished_test, procs_needed in finished_tests:
            self._procs_avail += procs_needed
            del threads_in_flight[finished_test]

    ###########################################################################
    def _consumer(self, test, test_phase, phase_method):
    ###########################################################################
        before_time = time.time()
        success = self._run_catch_exceptions(test, test_phase, phase_method)
        elapsed_time = time.time() - before_time
        status  = (TEST_PENDING_STATUS if test_phase == RUN_PHASE and not \
                   self._no_batch else TEST_PASS_STATUS) if success else TEST_FAIL_STATUS

        if status != TEST_PENDING_STATUS:
            self._update_test_status(test, test_phase, status)
        self._handle_test_status_file(test, test_phase, success)

        status_str = "Finished %s for test %s in %f seconds (%s)" %\
                     (test_phase, test, elapsed_time, status)
        if not success:
            status_str += "    Case dir: %s" % self._get_test_dir(test)
        logger.info(status_str)

        # On batch systems, we want to immediately submit to the queue, because
        # it's very cheap to submit and will get us a better spot in line
        if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE):
            logger.info("Starting %s for test %s with %d procs" % (RUN_PHASE, test, 1))
            self._update_test_status(test, RUN_PHASE, TEST_PENDING_STATUS)
            self._consumer(test, RUN_PHASE, self._run_phase)

    ###########################################################################
    def _producer(self):
    ###########################################################################
        threads_in_flight = {} # test-name -> (thread, procs, phase)
        while True:
            work_to_do = False
            num_threads_launched_this_iteration = 0
            for test in self._tests:
                logger.debug("test_name: " + test)
                # If we have no workers available, immediately wait
                if len(threads_in_flight) == self._parallel_jobs:
                    self._wait_for_something_to_finish(threads_in_flight)

                if self._work_remains(test):
                    work_to_do = True
                    if test not in threads_in_flight:
                        test_phase, test_status, _ = self._get_test_data(test)
                        expect(test_status != TEST_PENDING_STATUS, test)
                        next_phase = self._phases[self._phases.index(test_phase) + 1]
                        procs_needed = self._get_procs_needed(test, next_phase, threads_in_flight)

                        if procs_needed <= self._procs_avail:
                            self._procs_avail -= procs_needed

                            # Necessary to print this way when multiple threads printing
                            logger.info("Starting %s for test %s with %d procs" %
                                        (next_phase, test, procs_needed))

                            self._update_test_status(test, next_phase, TEST_PENDING_STATUS)
                            new_thread = threading.Thread(target=self._consumer,
                                args=(test, next_phase, getattr(self, "_%s_phase" % next_phase.lower())) )
                            threads_in_flight[test] = (new_thread, procs_needed, next_phase)
                            new_thread.start()
                            num_threads_launched_this_iteration += 1

            if not work_to_do:
                break

            if num_threads_launched_this_iteration == 0:
                # No free resources, wait for something in flight to finish
                self._wait_for_something_to_finish(threads_in_flight)

        for unfinished_thread, _, _ in threads_in_flight.values():
            unfinished_thread.join()

    ###########################################################################
    def _setup_cs_files(self):
    ###########################################################################
        try:
            python_libs_root = CIME.utils.get_python_libs_root()
            template_file = os.path.join(python_libs_root, "cs.status.template")
            template = open(template_file, "r").read()
            template = template.replace("<PATH>",
                                        os.path.join(self._cime_root,"scripts","Tools")).replace\
                                        ("<TESTID>", self._test_id)
            cs_status_file = os.path.join(self._test_root, "cs.status.%s" % self._test_id)
            with open(cs_status_file, "w") as fd:
                fd.write(template)
            os.chmod(cs_status_file, os.stat(cs_status_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

            template_file = os.path.join(python_libs_root, "cs.submit.template")
            template = open(template_file, "r").read()
            build_cmd = "./*.build" if self._no_build else ":"
            run_cmd = "./*.test" if self._no_batch else "./*.submit"
            template = template.replace("<BUILD_CMD>",
                                        build_cmd).replace("<RUN_CMD>",
                                                           run_cmd).replace("<TESTID>",
                                                                            self._test_id)

            if self._no_build or self._no_run:
                cs_submit_file = os.path.join(self._test_root, "cs.submit.%s" % self._test_id)
                with open(cs_submit_file, "w") as fd:
                    fd.write(template)
                os.chmod(cs_submit_file,
                         os.stat(cs_submit_file).st_mode | stat.S_IXUSR | stat.S_IXGRP)

        except Exception as e:
            logger.warning("FAILED to set up cs files: %s" % str(e))

    ###########################################################################
    def system_test(self):
    ###########################################################################
        """
        Main API for this class.

        Return True if all tests passed.
        """
        start_time = time.time()

        # Tell user what will be run
        logger.info( "RUNNING TESTS:")
        for test in self._tests:
            logger.info( "  %s"% test)

        # TODO - documentation

        self._producer()

        expect(threading.active_count() == 1, "Leftover threads?")

        # Setup cs files
        self._setup_cs_files()

        # Return True if all tests passed
        logger.info( "At system_test close, state is:")
        rv = True
        for test in self._tests:
            phase, status, nl_fail = self._get_test_data(test)
            logger.debug("phase %s status %s" % (phase, status))
            if status == TEST_PASS_STATUS and phase == RUN_PHASE:
                # Be cautious about telling the user that the test passed. This
                # status should match what they would see on the dashboard. Our
                # self._test_states does not include comparison fail information,
                # so we need to parse test status.
                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
                status = wait_for_tests.interpret_status_file(test_status_file)[1]

            if status not in [TEST_PASS_STATUS, TEST_PENDING_STATUS]:
                logger.info( "%s %s (phase %s)" % (status, test, phase))
                rv = False

            elif nl_fail:
                logger.info( "%s %s (but otherwise OK)" % (NAMELIST_FAIL_STATUS, test))
                rv = False

            else:
                logger.info("status=%s test=%s phase=%s"%( status, test, phase))

            logger.info( "    Case dir: %s" % self._get_test_dir(test))

        logger.info( "system_test took %s seconds"% (time.time() - start_time))

        return rv