def framework_verify(code_root, run_output):
    print("Checking linked output files")
    try:
        html_root = os.path.join(run_output, 'index.html')
        if not os.path.exists(html_root):
            raise IOError(
                "Can't find framework html output in {}".format(html_root))
        link_verifier = LinkVerifier(html_root, verbose=False)
        missing_dict = link_verifier.verify_all_links()
    except Exception as exc:
        fatal_exception_handler(exc, "ERROR in link verification.")
    if missing_dict:
        print("ERROR: the following files are missing:")
        print(util.pretty_print_json(missing_dict))
        util.exit_handler(code=1)
    print("SUCCESS: no missing links found.")
    print("Finished: framework test run successful!")
Example #2
0
    def parse_pod_list(self, pod_list, pod_info_tuple):
        pod_data = pod_info_tuple.pod_data  # pod names -> contents of settings file
        args = util.to_iter(pod_list, set)
        bad_args = []
        pods = []
        for arg in args:
            if arg == 'all':
                # add all PODs except example PODs
                pods.extend(
                    [p for p in pod_data if not p.startswith('example')])
            elif arg == 'example' or arg == 'examples':
                # add example PODs
                pods.extend([p for p in pod_data if p.startswith('example')])
            elif arg in pod_info_tuple.realm_data:
                # realm_data: realm name -> list of POD names
                # add all PODs for this realm
                pods.extend(pod_info_tuple.realm_data[arg])
            elif arg in pod_data:
                # add POD by name
                pods.append(arg)
            else:
                # unrecognized argument
                _log.error("POD identifier '%s' not recognized.", arg)
                bad_args.append(arg)

        if bad_args:
            valid_args = ['all', 'examples'] \
                + pod_info_tuple.sorted_realms \
                + pod_info_tuple.sorted_pods
            _log.critical((
                "The following POD identifiers were not recognized: "
                "[%s].\nRecognized identifiers are: [%s].\n(Received --pods = %s)."
            ), ', '.join(f"'{p}'" for p in bad_args),
                          ', '.join(f"'{p}'" for p in valid_args),
                          str(list(args)))
            util.exit_handler(code=1)

        pods = list(set(pods))  # delete duplicates
        if not pods:
            _log.critical((
                "ERROR: no PODs selected to be run. Do `./mdtf info pods`"
                " for a list of available PODs, and check your -p/--pods argument."
                f"\nReceived --pods = {str(list(args))}"))
            util.exit_handler(code=1)
        return pods
Example #3
0
    def verify_paths(self, config, p):
        # needs to be here, instead of PathManager, because we subclass it in
        # NOAA_GFDL
        keep_temp = config.get('keep_temp', False)
        # clean out WORKING_DIR if we're not keeping temp files:
        if os.path.exists(p.WORKING_DIR) and not \
            (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR):
            shutil.rmtree(p.WORKING_DIR)

        try:
            for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT',
                                                             False),
                                      ('MODEL_DATA_ROOT',
                                       True), ('WORKING_DIR', True)):
                util.check_dir(p, dir_name, create=create_)
        except Exception as exc:
            _log.fatal((f"Input settings for {dir_name} mis-specified (caught "
                        f"{repr(exc)}.)"))
            util.exit_handler(code=1)
 def _set_case_root_dir(self, log=_log):
     """Additional logic to set CASE_ROOT_DIR from MODEL_DATA_ROOT.
     """
     config = core.ConfigManager()
     paths = core.PathManager()
     if not self.CASE_ROOT_DIR and config.CASE_ROOT_DIR:
         log.debug("Using global CASE_ROOT_DIR = '%s'.",
                   config.CASE_ROOT_DIR)
         self.CASE_ROOT_DIR = config.CASE_ROOT_DIR
     if not self.CASE_ROOT_DIR:
         model_root = getattr(paths, 'MODEL_DATA_ROOT', None)
         log.debug("Setting CASE_ROOT_DIR to MODEL_DATA_ROOT = '%s'.",
                   model_root)
         self.CASE_ROOT_DIR = model_root
     # verify CASE_ROOT_DIR exists
     if not os.path.isdir(self.CASE_ROOT_DIR):
         log.critical("Data directory CASE_ROOT_DIR = '%s' not found.",
                      self.CASE_ROOT_DIR)
         util.exit_handler(code=1)
    def __post_init__(self, log=_log):
        """Validate user input.
        """
        super(ExplicitFileDataAttributes, self).__post_init__(log=log)

        config = core.ConfigManager()
        if not self.config_file:
            self.config_file = config.get('config_file', '')
        if not self.config_file:
            log.critical(
                ("No configuration file found for ExplicitFileDataSource "
                 "(--config-file)."))
            util.exit_handler(code=1)

        if self.convention != core._NO_TRANSLATION_CONVENTION:
            log.debug(
                "Received incompatible convention '%s'; setting to '%s'.",
                self.convention, core._NO_TRANSLATION_CONVENTION)
            self.convention = core._NO_TRANSLATION_CONVENTION
Example #6
0
 def __post_init__(self, log=_log):
     """Validate user input.
     """
     super(SampleDataAttributes, self).__post_init__(log=log)
     # set sample_dataset
     if not self.sample_dataset and self.CASENAME:
         log.debug(
             "'sample_dataset' not supplied, using CASENAME = '%s'.",
             self.CASENAME
         )
         self.sample_dataset = self.CASENAME
     # verify chosen subdirectory exists
     if not os.path.isdir(
         os.path.join(self.CASE_ROOT_DIR, self.sample_dataset)
     ):
         log.critical(
             "Sample dataset '%s' not found in CASE_ROOT_DIR = '%s'.",
             self.sample_dataset, self.CASE_ROOT_DIR)
         util.exit_handler(code=1)
Example #7
0
def untar_data(ftp_data, install_config):
    """Extract tar files of obs/model data and move contents to correct location.
    """
    if platform.system() == 'Darwin':  # workaround for macos
        tar_cmd = 'open -W -g -j -a "{}" '
        test_path = "/System/Library/CoreServices/Applications/Archive Utility.app"
        if os.path.exists(test_path):
            tar_cmd = tar_cmd.format(test_path)
        else:
            # Location on Yosemite and earlier
            test_path = "/System/Library/CoreServices/Archive Utility.app"
            if os.path.exists(test_path):
                tar_cmd = tar_cmd.format(test_path)
            else:
                print("ERROR: could not find Archive Utility.app.")
                util.exit_handler(code=1)
    else:
        tar_cmd = 'tar -xf '

    for f in iter(ftp_data.values()):
        print("Extracting {}".format(f.file))
        cwd = install_config[f.target_dir]
        f_subdir_0 = f.contents_subdir.split(os.sep)[0]
        try:
            _ = shell_command_wrapper(tar_cmd + f.file, cwd=cwd)
        except Exception as exc:
            fatal_exception_handler(
                exc, "ERROR: could not extract {}.".format(f.file))
        try:
            for d in os.listdir(os.path.join(cwd, f.contents_subdir)):
                shutil.move(os.path.join(cwd, f.contents_subdir, d),
                            os.path.join(cwd, d))
            shutil.rmtree(os.path.join(cwd, f_subdir_0))
        except Exception as exc:
            fatal_exception_handler(
                exc, "ERROR: could not move contents of {}.".format(f.file))
        try:
            os.remove(os.path.join(cwd, f.file))
        except Exception as exc:
            fatal_exception_handler(
                exc, "ERROR: could not delete {}.".format(f.file))
Example #8
0
 def __init__(self, cli_obj):
     super(MDTFFramework, self).__init__(name=self.__class__.__name__,
                                         _parent=None,
                                         status=ObjectStatus.ACTIVE)
     self.code_root = cli_obj.code_root
     self.pod_list = []
     self.cases = dict()
     self.global_env_vars = dict()
     try:
         # load pod data
         pod_info_tuple = mdtf_info.load_pod_settings(self.code_root)
         # load log config
         log_config = cli.read_config_file(self.code_root,
                                           "logging.jsonc",
                                           site=cli_obj.site)
         self.configure(cli_obj, pod_info_tuple, log_config)
     except Exception as exc:
         tb_exc = traceback.TracebackException(*(sys.exc_info()))
         _log.critical("Framework caught exception %r", exc)
         print(''.join(tb_exc.format()))
         util.exit_handler(code=1)
Example #9
0
    def verify_paths(self, config, p):
        keep_temp = config.get('keep_temp', False)
        # clean out WORKING_DIR if we're not keeping temp files:
        if os.path.exists(p.WORKING_DIR) and not \
            (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR):
            gfdl_util.rmtree_wrapper(p.WORKING_DIR)

        try:
            for dir_name, create_ in (
                ('CODE_ROOT', False), ('OBS_DATA_REMOTE', False),
                ('OBS_DATA_ROOT', True), ('MODEL_DATA_ROOT', True), ('WORKING_DIR', True)
            ):
                util.check_dir(p, dir_name, create=create_)
        except Exception as exc:
            _log.fatal((f"Input settings for {dir_name} mis-specified (caught "
                f"{repr(exc)}.)"))
            util.exit_handler(code=1)

        # Use GCP to create OUTPUT_DIR on a volume that may be read-only
        if not os.path.exists(p.OUTPUT_DIR):
            gfdl_util.make_remote_dir(p.OUTPUT_DIR, self.timeout, self.dry_run,
                log=_log)
Example #10
0
    def parse_flags(self, cli_obj):
        if cli_obj.config.get('dry_run', False):
            cli_obj.config['test_mode'] = True

        if cli_obj.config.get('disable_preprocessor', False):
            _log.warning(("User disabled metadata checks and unit conversion in "
                "preprocessor."), tags=util.ObjectLogTag.BANNER)
        if cli_obj.config.get('overwrite_file_metadata', False):
            _log.warning(("User chose to overwrite input file metadata with "
                "framework values (convention = '%s')."),
                cli_obj.config.get('convention', ''),
                tags=util.ObjectLogTag.BANNER
            )
        # check this here, otherwise error raised about missing caselist is not informative
        try:
            if cli_obj.config.get('CASE_ROOT_DIR', ''):
                util.check_dir(cli_obj.config['CASE_ROOT_DIR'], 'CASE_ROOT_DIR',
                    create=False)
        except Exception as exc:
            _log.fatal((f"Mis-specified input for CASE_ROOT_DIR (received "
                f"'{cli_obj.config.get('CASE_ROOT_DIR', '')}', caught {repr(exc)}.)"))
            util.exit_handler(code=1)
Example #11
0
            if 'CASE_ROOT_DIR' not in cli_d and d.get('root_dir', None):
                # CASE_ROOT was set positionally
                cli_d['CASE_ROOT_DIR'] = d['root_dir']
            case_list_in = [cli_d]
        else:
            case_list_in = util.to_iter(cli_obj.file_case_list)
        self.cases = dict()
        for i, case_d in enumerate(case_list_in):
            case = self.parse_case(i, case_d, cli_obj, pod_info_tuple)
            if case:
                self.cases[case['CASENAME']] = case
        if not self.cases:
            _log.critical(("No valid entries in case_list. Please specify "
                           "model run information.\nReceived:"
                           f"\n{util.pretty_print_json(case_list_in)}"))
            util.exit_handler(code=1)

    def verify_paths(self, config, p):
        # needs to be here, instead of PathManager, because we subclass it in
        # NOAA_GFDL
        keep_temp = config.get('keep_temp', False)
        # clean out WORKING_DIR if we're not keeping temp files:
        if os.path.exists(p.WORKING_DIR) and not \
            (keep_temp or p.WORKING_DIR == p.OUTPUT_DIR):
            shutil.rmtree(p.WORKING_DIR)

        try:
            for dir_name, create_ in (('CODE_ROOT', False), ('OBS_DATA_ROOT',
                                                             False),
                                      ('MODEL_DATA_ROOT',
                                       True), ('WORKING_DIR', True)):
Example #12
0
def load_pod_settings(code_root, pod=None, pod_list=None):
    """Wrapper to load and parse the contents of POD settings files, used by
    :class:`~src.core.MDTFFramework` and :class:`InfoCLIHandler`.

    Args:
        code_root (str): Absolute path to t
        pod (str, optional):
        pod_list (list, optional): List of POD names to load settings files.

    Raises:
        :class:`~src.util.PodConfigError`: If an error is raised opening or
            parsing the contents of a settings file. In normal operation, this
            is treated as a fatal error and will cause package exit.

    Returns:
        Instance of :data:`PodDataTuple`.

    """
    _pod_dir = 'diagnostics'
    _file_name = 'settings.jsonc'

    def _load_one_json(pod_):
        pod_dir = os.path.join(code_root, _pod_dir, pod_)
        settings_path = os.path.join(pod_dir, _file_name)
        try:
            d = util.read_json(settings_path)
            for section in ['settings', 'varlist']:
                if section not in d:
                    raise AssertionError(
                        f"'{section}' entry not found in '{_file_name}'.")
        except util.MDTFFileNotFoundError as exc:
            if not os.path.isdir(pod_dir):
                raise util.PodConfigError(
                    (f"'{pod_}' directory not found in "
                     f"'{os.path.join(code_root, _pod_dir)}'."), pod_)
            elif not os.path.isfile(settings_path):
                raise util.PodConfigError((f"'{_file_name}' file not found in "
                                           f"'{pod_dir}'."), pod_)
            else:
                raise exc
        except (JSONDecodeError, AssertionError) as exc:
            raise util.PodConfigError((f"Syntax error in '{_file_name}': "
                                       f"{str(exc)}."), pod_)
        except Exception as exc:
            raise util.PodConfigError(
                (f"Error encountered in reading '{_file_name}': "
                 f"{repr(exc)}."), pod_)
        return d

    # get list of pods
    if not pod_list:
        pod_list = os.listdir(os.path.join(code_root, _pod_dir))
        pod_list = [s for s in pod_list if not s.startswith(('_', '.'))]
        pod_list.sort(key=str.lower)
    if pod == 'list':
        return pod_list

    # load one settings.jsonc file
    if pod is not None:
        if pod not in pod_list:
            print(
                f"Couldn't recognize '{pod}' out of the following diagnostics:"
            )
            print(', '.join(pod_list))
            return dict()
        return _load_one_json(pod)

    # load all of them
    pods = dict()
    realm_list = set()
    bad_pods = []
    realms = collections.defaultdict(list)
    for p in pod_list:
        try:
            d = _load_one_json(p)
        except Exception as exc:
            _log.error(exc)
            bad_pods.append(p)
            continue
        pods[p] = d
        # PODs requiring data from multiple realms get stored in the dict
        # under a tuple of those realms; realms stored indivudally in realm_list
        _realm = util.to_iter(d['settings'].get('realm', None), tuple)
        if len(_realm) == 0:
            continue
        elif len(_realm) == 1:
            _realm = _realm[0]
            realm_list.add(_realm)
        else:
            realm_list.update(_realm)
        realms[_realm].append(p)
    if bad_pods:
        _log.critical(
            ("Errors were encountered when finding the following PODS: "
             "[%s]."), ', '.join(f"'{p}'" for p in bad_pods))
        util.exit_handler(code=1)
    return PodDataTuple(pod_data=pods,
                        realm_data=realms,
                        sorted_pods=pod_list,
                        sorted_realms=sorted(list(realm_list), key=str.lower))
    def __post_init__(self, log=_log, model=None, experiment=None):
        super(CMIP6DataSourceAttributes, self).__post_init__(log=log)
        config = core.ConfigManager()
        cv = cmip6.CMIP6_CVs()

        def _init_x_from_y(source, dest):
            if not getattr(self, dest, ""):
                try:
                    source_val = getattr(self, source, "")
                    if not source_val:
                        raise KeyError()
                    dest_val = cv.lookup_single(source_val, source, dest)
                    log.debug("Set %s='%s' based on %s='%s'.", dest, dest_val,
                              source, source_val)
                    setattr(self, dest, dest_val)
                except KeyError:
                    log.debug("Couldn't set %s from %s='%s'.", dest, source,
                              source_val)
                    setattr(self, dest, "")

        if not self.CASE_ROOT_DIR and config.CASE_ROOT_DIR:
            log.debug("Using global CASE_ROOT_DIR = '%s'.",
                      config.CASE_ROOT_DIR)
            self.CASE_ROOT_DIR = config.CASE_ROOT_DIR
        # verify case root dir exists
        if not os.path.isdir(self.CASE_ROOT_DIR):
            log.critical("Data directory CASE_ROOT_DIR = '%s' not found.",
                         self.CASE_ROOT_DIR)
            util.exit_handler(code=1)

        # should really fix this at the level of CLI flag synonyms
        if model and not self.source_id:
            self.source_id = model
        if experiment and not self.experiment_id:
            self.experiment_id = experiment

        # validate non-empty field values
        for field in dataclasses.fields(self):
            val = getattr(self, field.name, "")
            if not val:
                continue
            try:
                if not cv.is_in_cv(field.name, val):
                    log.error((
                        "Supplied value '%s' for '%s' is not recognized by "
                        "the CMIP6 CV. Continuing, but queries will probably fail."
                    ), val, field.name)
            except KeyError:
                # raised if not a valid CMIP6 CV category
                continue
        # currently no inter-field consistency checks: happens implicitly, since
        # set_experiment will find zero experiments.

        # Attempt to determine first few fields of DRS, to avoid having to crawl
        # entire DRS structure
        _init_x_from_y('experiment_id', 'activity_id')
        _init_x_from_y('source_id', 'institution_id')
        _init_x_from_y('institution_id', 'source_id')
        # TODO: multi-column lookups
        # set CATALOG_DIR to be further down the hierarchy if possible, to
        # avoid having to crawl entire DRS strcture; CASE_ROOT_DIR remains the
        # root of the DRS hierarchy
        new_root = self.CASE_ROOT_DIR
        for drs_attr in ("activity_id", "institution_id", "source_id",
                         "experiment_id"):
            drs_val = getattr(self, drs_attr, "")
            if not drs_val:
                break
            new_root = os.path.join(new_root, drs_val)
        if not os.path.isdir(new_root):
            log.error("Data directory '%s' not found; starting crawl at '%s'.",
                      new_root, self.CASE_ROOT_DIR)
            self.CATALOG_DIR = self.CASE_ROOT_DIR
        else:
            self.CATALOG_DIR = new_root
Example #14
0
def load_pod_settings(code_root, pod=None, pod_list=None):
    """Wrapper to load POD settings files, used by ConfigManager and CLIInfoHandler.
    """
    # only place we can put it would be util.py if we want to avoid circular imports
    _pod_dir = 'diagnostics'
    _file_name = 'settings.jsonc'

    def _load_one_json(pod_):
        pod_dir = os.path.join(code_root, _pod_dir, pod_)
        settings_path = os.path.join(pod_dir, _file_name)
        try:
            d = util.read_json(settings_path)
            for section in ['settings', 'varlist']:
                if section not in d:
                    raise AssertionError(
                        f"'{section}' entry not found in '{_file_name}'.")
        except util.MDTFFileNotFoundError as exc:
            if not os.path.isdir(pod_dir):
                raise util.PodConfigError(
                    (f"'{pod_}' directory not found in "
                     f"'{os.path.join(code_root, _pod_dir)}'."), pod_)
            elif not os.path.isfile(settings_path):
                raise util.PodConfigError((f"'{_file_name}' file not found in "
                                           f"'{pod_dir}'."), pod_)
            else:
                raise exc
        except (JSONDecodeError, AssertionError) as exc:
            raise util.PodConfigError((f"Syntax error in '{_file_name}': "
                                       f"{str(exc)}."), pod_)
        except Exception as exc:
            raise util.PodConfigError(
                (f"Error encountered in reading '{_file_name}': "
                 f"{repr(exc)}."), pod_)
        return d

    # get list of pods
    if not pod_list:
        pod_list = os.listdir(os.path.join(code_root, _pod_dir))
        pod_list = [s for s in pod_list if not s.startswith(('_', '.'))]
        pod_list.sort(key=str.lower)
    if pod == 'list':
        return pod_list

    # load one settings.jsonc file
    if pod is not None:
        if pod not in pod_list:
            print(
                f"Couldn't recognize '{pod}' out of the following diagnostics:"
            )
            print(', '.join(pod_list))
            return dict()
        return _load_one_json(pod)

    # load all of them
    pods = dict()
    realm_list = set()
    bad_pods = []
    realms = collections.defaultdict(list)
    for p in pod_list:
        try:
            d = _load_one_json(p)
        except Exception as exc:
            _log.error(exc)
            bad_pods.append(p)
            continue
        pods[p] = d
        # PODs requiring data from multiple realms get stored in the dict
        # under a tuple of those realms; realms stored indivudally in realm_list
        _realm = util.to_iter(d['settings'].get('realm', None), tuple)
        if len(_realm) == 0:
            continue
        elif len(_realm) == 1:
            _realm = _realm[0]
            realm_list.add(_realm)
        else:
            realm_list.update(_realm)
        realms[_realm].append(p)
    if bad_pods:
        _log.critical(
            ("Errors were encountered when finding the following PODS: "
             "[%s]."), ', '.join(f"'{p}'" for p in bad_pods))
        util.exit_handler(code=1)
    return PodDataTuple(pod_data=pods,
                        realm_data=realms,
                        sorted_pods=pod_list,
                        sorted_realms=sorted(list(realm_list), key=str.lower))