Exemple #1
0
def test_delete():
    """pyessv-tests: io: delete.

    """
    authority_dirs = os.listdir(LIB.DIR_ARCHIVE)
    authority_dir = os.path.join(LIB.DIR_ARCHIVE, tu.AUTHORITY_NAME)
    authority_manifest = os.path.join(authority_dir, 'MANIFEST')
    scope_dir = os.path.join(authority_dir, tu.SCOPE_NAME)
    collection_01_dir = os.path.join(scope_dir, tu.COLLECTION_01_NAME)
    collection_02_dir = os.path.join(scope_dir, tu.COLLECTION_02_NAME)
    collection_03_dir = os.path.join(scope_dir, tu.COLLECTION_03_NAME)
    term_01_file = os.path.join(collection_01_dir, tu.TERM_01_NAME)
    term_02_file = os.path.join(collection_02_dir, tu.TERM_02_NAME)
    term_03_file = os.path.join(collection_03_dir, tu.TERM_03_NAME)
    io_manager.write(LIB.load(tu.AUTHORITY_NAMESPACE))

    for namespace, npath, predicate in (
        (tu.TERM_01_NAMESPACE, term_01_file, os.path.isfile),
        (tu.TERM_02_NAMESPACE, term_02_file, os.path.isfile),
        (tu.TERM_03_NAMESPACE, term_03_file, os.path.isfile),
        (tu.COLLECTION_01_NAMESPACE, collection_01_dir, os.path.isdir),
        (tu.COLLECTION_02_NAMESPACE, collection_02_dir, os.path.isdir),
        (tu.COLLECTION_03_NAMESPACE, collection_03_dir, os.path.isdir),
        (tu.SCOPE_NAMESPACE, scope_dir, os.path.isdir),
        (tu.AUTHORITY_NAMESPACE, authority_dir, os.path.isdir),
    ):
        node = LIB.load(namespace)
        io_manager.delete(node)
        assert not predicate(npath)
Exemple #2
0
def validate_vocabularies(projects, experiments):
    """Validate various CV termsets within collections.

    """
    _validate('PROJECT', pyessv.load('wcrp:cmip6:activity-id'), projects)
    _validate("EXPERIMENTS", pyessv.load('wcrp:cmip6:experiment-id'),
              experiments)
    print "------------------------------------------------------"
Exemple #3
0
def _main(args):
    """Main entry point.

    """
    if not os.path.isdir(args.source):
        raise ValueError('ESGF vocab directory does not exist: {}'.format(
            args.source))

    # Process project modules:
    for module in _MODULES:
        # Set project.
        project = module.__name__[4:].replace('_', '-')

        # Set ini file handler.
        ini_section = _IniSection(project, args.source)

        # Load authority & create scope.
        if module in _MODULES_ECMWF:
            authority = _create_authority_ecmwf()
            scope = pyessv.load('ecmwf:{}'.format(project))
        else:
            authority = pyessv.load('wcrp')
            scope = pyessv.load('wcrp:{}'.format(project))

        if not scope:
            scope = _create_scope(authority, project)

        # Set scope data.
        scope.data = scope.data or dict()
        for field in module.SCOPE_DATA:
            scope.data[field] = ini_section.get_option(field, raw=True)

        # Create regex collections.
        collections = [
            i for i in module.COLLECTIONS if not inspect.isfunction(i[1])
        ]
        for collection_id, term_regex in collections:
            _create_collection(module,
                               scope,
                               collection_id,
                               term_regex=term_regex)

        # Create standard collections.
        collections = [
            i for i in module.COLLECTIONS if inspect.isfunction(i[1])
        ]
        for collection_id, term_factory in collections:
            ctx = _MappingExecutionContext(project, collection_id, ini_section)
            collection = _create_collection(module, scope, collection_id)
            try:
                term_factory = term_factory()
            except TypeError:
                pass
            for term_data in term_factory(ctx):
                _get_term(collection, term_data)

    # Add to archive & persist to file system.
    pyessv.archive(authority)
Exemple #4
0
def validate_vocabularies(projects, experiments):
    """Validate various CV termsets within collections.

    """
    _validate(
        'PROJECT',
        pyessv.load('wcrp:cmip6:activity-id'),
        projects
        )
    _validate(
        "EXPERIMENTS",
        pyessv.load('wcrp:cmip6:experiment-id'),
        experiments
        )
    print "------------------------------------------------------"
def test_search_setup():
    """ERRATA :: WS :: SEARCH :: setup.

    """
    # Invoke WS endpoint.
    r = requests.get(_URL_SEARCH_SETUP)

    # Assert WS response.
    obj = tu.assert_ws_response(_URL_SEARCH_SETUP, r, fields={'vocabs', 'values'})

    # Assert vocabularies.
    for collection in obj['vocabs']:
        assert isinstance(pyessv.load(collection['namespace']), pyessv.Collection)
        for term in collection['terms']:
            assert isinstance(pyessv.load(term['namespace']), pyessv.Term)
Exemple #6
0
    def _test(func, typeof):
        """Inner test.

        """
        node = func()
        tu.assert_object(node, typeof)
        assert node == load(node.namespace)
Exemple #7
0
def _get_institutes():
    """Returns canonical cmip6 institutes (derived from vocabularies).

    """
    return [
        i.canonical_name.split(':')[-1]
        for i in pyessv.load('wcrp:cmip6:institution-id')
    ]
Exemple #8
0
def test_create(node_factory, node_type):
    """Test instantiation of domain entities.

    """
    node = node_factory()
    tu.assert_object(node, node_type)
    loaded = load(node.namespace)
    assert node.namespace == loaded.namespace
    assert repr(node) == repr(loaded)
Exemple #9
0
def write_authority():
    """Writes ES-DOC authority.

    """
    return pyessv.load('esdoc', verbose=False) or pyessv.create_authority(
        'esdoc',
        'Earth System Documentation',
        label='ES-DOC',
        url='https://es-doc.org',
        create_date=CREATE_DATE)
Exemple #10
0
def _create_authority_ecmwf():
    """Writes ECMWF authority.

    """
    return pyessv.load('ecmwf', verbose=False) or pyessv.create_authority(
        'ECMWF',
        'European Center for Medium-Range Weather Forecasts',
        label='ECMWF',
        url='https://www.ecmwf.int',
        create_date=_CREATE_DATE)
Exemple #11
0
        def _set_output():
            """Sets response to be returned to client.

            """
            # Set include meta section flag.
            include_meta = self.get_argument(_PARAM_INCLUDE_META,
                                             'false') == 'true'

            # Sets vocabulary identifier.
            identifier = ':'.join([
                i.strip().lower().replace('_', '-')
                for i in self.request.path.split('/')[3:]
            ])

            # Set output to be returned to client.
            if len(identifier) == 0:
                self.output = {
                    'data': [_encode(i, include_meta) for i in pyessv.load()]
                }
            else:
                self.output = {
                    'data': _encode(pyessv.load(identifier), include_meta)
                }
Exemple #12
0
def _get_pid_tasks(issue, obj):
    """Returns PID service tasks extracted from issue data.

    """
    pid_tasks = []
    project = pyessv.load('esdoc:errata:project:{}'.format(issue.project))
    if project.data['is_pid_client'] == True:
        for identifier in obj[JF_DATASETS]:
            task = PIDServiceTask()
            task.action = PID_ACTION_INSERT
            task.issue_uid = issue.uid
            task.dataset_id = identifier
            pid_tasks.append(task)

    return pid_tasks
Exemple #13
0
def _write_scope(authority):
    """Writes ES-DOC cmip6 scope.

    """
    scope = pyessv.load('esdoc:cmip6', verbose=False) or pyessv.create_scope(
        authority,
        'cmip6',
        'ES-DOC controlled Vocabularies (CVs) for use in cmip6',
        create_date=utils.CREATE_DATE,
        label='CMIP6',
        url='https://github.com/ES-DOC')

    _write_model_topic(scope)

    return scope
Exemple #14
0
def _main(args):
    """Main entry point.

    """
    # Open template.
    content = _get_template_content(args.template_fpath)

    # Create CORDEXP collections.
    for collection in pyessv.load("copernicus:cordexp"):
        data = ''
        for term in collection:
            data += '\t\'{}\'\n'.format(term.canonical_name)
        content = content.replace('[__CORDEXP_{}__]'.format(collection.raw_name.upper()), data)
        content = content.replace('[__CORDEXP_{}_RAW__]'.format(collection.raw_name.upper()), data)

    # Create CORDEX collections.
    data = ''
    for term in pyessv.load("ecmwf:c3s-cordex:institute"):
        data += '\t\'{}\'\n'.format(term.canonical_name)
    content = content.replace('[__CORDEX_INSTITUTION_ID__]', data)
    content = content.replace('[__CORDEX_INSTITUTION_ID_RAW__]', data)

    # Write output to file system.
    _set_output(args.output_fpath, content)
Exemple #15
0
def _main(args):
    """Main entry point.

    """
    if args.authority is None or len(args.authority.strip()) == 0:
        raise ValueError('Authority is a required parameter')

    for scope in pyessv.load(args.authority):
        if args.scope and args.scope != scope.canonical_name:
            continue
        for collection in scope:
            if args.collection and args.collection != collection.canonical_name:
                continue
            for term in collection:
                if args.term and args.term != term.canonical_name:
                    continue
                print(term.namespace.replace(':', ' -> '))
Exemple #16
0
def _map_collection(identifier):
    """Converts a pyessv collection to a dictionary.

    """
    collection = pyessv.load(identifier)

    result = {
        'canonical_name': collection.canonical_name,
        'key': collection.namespace,
        'label': collection.label,
        'namespace': collection.namespace,
        'terms': [_map_term(i) for i in collection]
    }
    if collection.data is not None:
        result.update(collection.data)

    return result
def extract_facets(project, data):
	"""Extracts terms from a dataset identifer.

    :param str project: Project code.
    :param str|list data: Dataset identifier(s).

    :returns: Set of pyessv terms extracted from dataset identifier.
	:rtype: list

	"""
	seperator, targets = _CONFIG[project]
	facets = []
	identifiers = [data] if isinstance(data, basestring) else data
	for identifier in identifiers:
		parts = identifier.split(seperator)
		facets += ['{}:{}'.format(i, parts[j]) for i, j in targets]

	return [pyessv.load(i) for i in set(facets)]
Exemple #18
0
def _write_scope(authority):
    """Writes ES-DOC errata scope.

    """
    scope = pyessv.load('esdoc:errata', verbose=False) or pyessv.create_scope(
        authority,
        'errata',
        'Controlled Vocabularies (CVs) for use in dataset errata',
        create_date=utils.CREATE_DATE,
        label='Dataset Errata',
        url='https://github.com/ES-DOC/esdoc-errata-ws')

    _write_projects(scope)
    _write_issue_severity(scope)
    _write_issue_status(scope)
    _write_pid_task_action(scope)
    _write_pid_task_status(scope)

    return scope
Exemple #19
0
        def _set_output():
            """Sets response to be returned to client.

            """
            # Set vocabs to be loaded.
            vocabs = {
                'esdoc:errata:project',
                'esdoc:errata:severity',
                'esdoc:errata:status',
            }
            for project in pyessv.load('esdoc:errata:project'):
                for vocab in project.data['facets']:
                    vocabs.add(vocab)

            # Get facet values.
            with db.session.create():
                facet_values = set(db.dao.get_project_facets())                

            # Set output.
            self.output = {
                'vocabs': [_map_collection(i) for i in sorted(vocabs)],
                'values': facet_values
            }
Exemple #20
0
def test_write():
    """pyessv-tests: io: write.

    """
    authority_dirs = os.listdir(LIB.DIR_ARCHIVE)
    authority_dir = os.path.join(LIB.DIR_ARCHIVE, tu.AUTHORITY_NAME)
    authority_manifest = os.path.join(authority_dir, 'MANIFEST')
    scope_dir = os.path.join(authority_dir, tu.SCOPE_NAME)
    collection_01_dir = os.path.join(scope_dir, tu.COLLECTION_01_NAME)
    collection_02_dir = os.path.join(scope_dir, tu.COLLECTION_02_NAME)
    collection_03_dir = os.path.join(scope_dir, tu.COLLECTION_03_NAME)
    term_01_file = os.path.join(collection_01_dir, tu.TERM_01_NAME)
    term_02_file = os.path.join(collection_02_dir, tu.TERM_02_NAME)
    term_03_file = os.path.join(collection_03_dir, tu.TERM_03_NAME)

    dpaths = (authority_dir, scope_dir, collection_01_dir, collection_02_dir,
              collection_03_dir)
    fpaths = (authority_manifest, term_01_file, term_02_file, term_03_file)

    for dpath in dpaths:
        assert not os.path.isdir(dpath)
    for fpath in fpaths:
        assert not os.path.isfile(fpath)

    io_manager.write(LIB.load(tu.AUTHORITY_NAME))

    assert len(os.listdir(LIB.DIR_ARCHIVE)) == len(authority_dirs) + 1
    for dpath in dpaths:
        assert os.path.isdir(dpath)
    for fpath in fpaths:
        assert os.path.isfile(fpath)

    with io.open(authority_manifest, 'r') as fstream:
        assert isinstance(json.loads(fstream.read()), dict)
    for fpath in fpaths:
        with io.open(fpath, 'r') as fstream:
            assert isinstance(json.loads(fstream.read()), dict)
def _main():
    """Main entry point.

    """
    # Set canonical institutes & actual GH teams.
    institutes = [i.canonical_name.split(':')[-1] for i in pyessv.load('wcrp:cmip6:institution-id')]
    teams = utils.get_teams(lambda i: i['name'].startswith('cmip6-'))

    # Set teams to be created.
    to_create = ['cmip6-{}'.format(i) for i in institutes if 'cmip6-{}'.format(i) not in teams]

    # Set teams to be deleted.
    to_delete = [i for i in teams.values() if i.name.startswith('cmip6') and  i.institution_id not in institutes]

    # Escape when nothing to do.
    if len(to_create) == 0 and len(to_delete) == 0:
        pyessv.log("Teams are in sync - nothing todo")
        return

    # Update GH.
    for team_id in to_create:
        utils.create_team(team_id)
    for team in to_delete:
        utils.delete_team(team)
Exemple #22
0
def _main():
    """Main entry point.

    """
    # Open template.
    with open(_TEMPLATE, 'r') as fstream:
        content = fstream.read()

    # Create CMIP6 collections.
    for scope in _VOCABS:
        for collection in [pyessv.load('wcrp:{}:{}'.format(scope, i)) for i in _VOCABS[scope]]:
            data = ''
            for term in collection:
                data += '\t\'{}\'\n'.format(term.canonical_name)
            content = content.replace('[{}]'.format(collection.raw_name.upper()), data)

            data = ''
            for term in collection:
                data += '\t\'{}\'\n'.format(term.raw_name)
            content = content.replace('[{}_RAW]'.format(collection.raw_name.upper()), data)

    # Write output to file system.
    with open(_OUTPUT, 'w') as fstream:
        fstream.write(content)
Exemple #23
0
def _main(args):
    """Main entry point.

    """
    # Open template.
    with open(_TEMPLATE, 'r') as fstream:
        content = fstream.read()

    # Create CMIP6 collections.
    for scope in _VOCABS:
        for collection in [pyessv.load('wcrp:{}:{}'.format(scope, i)) for i in _VOCABS[scope]]:
            data = ''
            for term in collection:
                data += '\t\'{}\'\n'.format(term.canonical_name)
            content = content.replace('[{}]'.format(collection.raw_name.upper()), data)

            data = ''
            for term in collection:
                data += '\t\'{}\'\n'.format(term.raw_name)
            content = content.replace('[{}_RAW]'.format(collection.raw_name.upper()), data)

    # Write output to file system.
    with open(args.output_fpath, 'w') as fstream:
        fstream.write(content)
def _get_institutes():
    """Returns canonical cmip6 institutes (derived from vocabularies).

    """
    return [ i.canonical_name.split(':')[-1]
             for i in pyessv.load('wcrp:cmip6:institution-id') ]
Exemple #25
0
 def _cache_controlled_vocabularies(self):
     """
     Loads controlled vocabularies once and caches them.
     """
     self._cvs = pyessv.load("{}:{}".format(self.authority, self.scope))
     self._authority_info = pyessv.load(self.authority)
Exemple #26
0
class CMIP6Check(BaseNCCheck):
    """
    The CMIP6 checker class 
    """

    register_checker = True
    name = "cmip6"

    # validation of a term against a CV is only performed once
    # and the result cached

    __cache = {
        "cv": {
            "scope":
            pyessv.load("wcrp:cmip6"),
            "institutions": [
                trm.data[u"postal_address"]
                for trm in pyessv.load('wcrp:cmip6:institution-id')
            ],
            "models": [],
            "experiments": [
                trm.data[u"experiment"]
                for trm in pyessv.load("wcrp:cmip6:experiment-id")
            ]
        },
        "mip_tables": MipTables(MIP_TABLES),
        "validated": {
            "canonical_name": {},
            "label": {},
            "raw_name": {}
        }
    }

    def __init__(self):
        super(CMIP6Check, self).__init__()
        self.__messages = []
        self.__erorrs = 0

    @classmethod
    def make_result(cls, level, score, out_of, name, messages):
        """A helper factory method for generating cc results"""
        return Result(level, (score, out_of), name, messages)

    @classmethod
    def _validate_term(cls, term, collection, term_type="canonical_name"):
        """Check a term against a CV, using cache if possible"""

        if collection in cls.__cache["validated"][term_type]:
            if term in cls.__cache["validated"][term_type][collection]:
                return cls.__cache["validated"][term_type][collection][term]
            else:
                # perform check
                try:
                    cls.__cache["validated"][term_type][collection][term] = (
                        term in [
                            getattr(trm, term_type) for trm in
                            cls.__cache["cv"]["scope"][collection].terms
                        ])
                except ValueError:
                    cls.__cache["validated"][term_type][collection][
                        term] = None
        else:
            cls.__cache["validated"][term_type][collection] = {}
            if collection in cls.__cache["cv"]["scope"]:
                cls.__cache["validated"][term_type][collection][term] = (
                    term in [
                        getattr(trm, term_type)
                        for trm in cls.__cache["cv"]["scope"][collection].terms
                    ])
            else:
                cls.__cache["validated"][term_type][collection][term] = None

        return cls.__cache["validated"][term_type][collection][term]

    def check_filename(self, ds):
        """
        Tests filename's facets against a CV
        <variable_id>   tas
        <table_id>      Amon
        <source_id>     hadgem3-es
        <experiment_id> piCtrl
        <member_id>     r1i1p1f1
        <grid_label>    gn
        [<time_range>]  201601-210012
        .nc

        Parameters
        ----------
        ds : netCDF4.Dataset
            an open ncdf file

        Returns
        -------
        compliance_checker.base.Result
            container with check's results
        """
        filename = os.path.basename(ds.filepath())
        filename_parts = filename.split('.')[0].split('_')
        template_dict = {
            "table-id": 1,
            "source-id": 2,
            "experiment-id": 3,
            "grid-label": 5
        }

        messages = []
        valid_filename = True
        level = BaseCheck.MEDIUM
        out_of = 1
        score = 0

        for cv in template_dict:
            if not self._validate_term(
                    filename_parts[template_dict[cv]].lower(), cv):
                messages.append("Invalid term {} in the filename {}".format(
                    cv, filename))
                valid_filename = False
            else:
                attr = ds.getncattr(cv.replace('-', '_'))
                if attr != filename_parts[template_dict[cv]]:
                    valid_filename = False
                    messages.append(
                        "Value {} of the attribute {} doesn't match filename {}"
                        .format(attr, cv, filename))
        member_id = filename_parts[4].split('-')

        if len(member_id) > 1:
            if not self._validate_term(member_id[1], "experiment-id"):
                messages.append("Invalid term {} in the filename {}".format(
                    "sub_experiment_id", filename))
                valid_filename = False
        if re.match(r"^r\d+i\d+p\d+f\d+$", member_id[0]) is None:
            valid_filename = False
            messages.append("Invalid variant_label {}".format(member_id[0]))
        else:
            variant_label = ds.getncattr("variant_label")
            if variant_label != member_id[0]:
                valid_filename = False
                messages.append(
                    "Variant label {} is not consistent with file contents ({})"
                    .format(member_id[0], variant_label))

        if filename_parts[1] in self.__cache["mip_tables"].names:
            if filename_parts[0] not in self.__cache[
                    "mip_tables"].get_variables_from_table(filename_parts[1]):
                valid_filename = False

        if len(filename_parts) == 7:
            try:
                frequency = ds.getncattr("frequency")
                d1, d2 = parse_date_range(filename_parts[6], frequency)
            except Exception as e:
                valid_filename = False
                messages.append("Invalid daterange {} ({})".format(
                    filename_parts[6], e.message))

        if valid_filename:
            score += 1
        return self.make_result(level, score, out_of, "DRS template check",
                                messages)

    def check_global_attributes(self, ds):
        """
        Checks for existence and validity of global attributes.

        Parameters
        ----------
        ds : netCDF4.Dataset
            an open ncdf file

        Returns
        -------
        compliance_checker.base.Result
            container with check's results
        """

        out_of = 1
        score = 0
        self.__errors = 0
        self.__messages = []

        dreq_version = self.__cache["mip_tables"].version

        # create validators
        positive_integer_validator = ValidatorFactory.integer_validator()
        nonempty_string_validator = ValidatorFactory.string_validator()

        # test for presence and contents of attributes contained in CV
        for cv_attribute in CV_ATTRIBUTES:
            self._validate_cv_attribute(ds, cv_attribute)

        # test if rfip indexes are positive integers
        for index_attribute in RUN_INDEX_ATTRIBUTES:
            self._exists_and_valid(ds, index_attribute,
                                   positive_integer_validator)

        # test if grid attribute is a non-empty string
        for mandatory_string in MANDATORY_TEXT_ATTRIBUTES:
            self._exists_and_valid(ds, mandatory_string,
                                   nonempty_string_validator)

        # tests if optional attrbutes are non-empty or don't appear at all
        for optional_string in OPTIONAL_TEXT_ATTRIBUTES:
            self._does_not_exist_or_valid(ds, optional_string,
                                          nonempty_string_validator)

        # validate experiment and institution descriptions
        self._exists_and_valid(
            ds, "experiment",
            ValidatorFactory.value_in_validator(
                self.__cache["cv"]["experiments"]))
        self._exists_and_valid(
            ds, "institution",
            ValidatorFactory.value_in_validator(
                self.__cache["cv"]["institutions"]))

        # validate CF convention
        self._exists_and_valid(
            ds, "Conventions",
            ValidatorFactory.value_in_validator(CF_CONVENTIONS))
        # validate creation date
        self._exists_and_valid(
            ds, "creation_date",
            ValidatorFactory.date_validator("%Y-%m-%dT%H:%M:%SZ"))
        # validate if data specification version is consistent with CMOR
        self._exists_and_valid(
            ds, "data_specs_version",
            ValidatorFactory.value_in_validator([dreq_version]))
        # validate external variables and other strings
        self._does_not_exist_or_valid(
            ds, "external_variables",
            ValidatorFactory.value_in_validator(["areacella", "areacello"]))
        self._exists_and_valid(
            ds, "license", ValidatorFactory.value_in_validator([LICENSE_TEXT]))
        self._exists_and_valid(ds, "mip_era",
                               ValidatorFactory.value_in_validator(["CMIP6"]))
        self._exists_and_valid(
            ds, "product",
            ValidatorFactory.value_in_validator(["model-output"]))
        self._exists_and_valid(ds, "source",
                               ValidatorFactory.string_validator(SOURCE_REGEX))
        self._exists_and_valid(
            ds, "tracking_id",
            ValidatorFactory.string_validator(
                r"^hdl:21.14100\/[a-zA-Z\d\-]+$"))

        attr_dict = {
            "forcing_index": None,
            "realization_index": None,
            "initialization_index": None,
            "physics_index": None,
            "experiment_id": None,
            "sub_experiment_id": None,
            "variant_label": None,
            "mip_era": None,
            "source_id": None,
            "institution_id": None,
            "table_id": None,
            "variable_id": None,
        }
        # populate attribute dictionary with values
        for attr_key in attr_dict:
            try:
                attr_dict[attr_key] = ds.getncattr(attr_key)
            except Exception as e:
                self.__errors += 1
                self.__messages.append(
                    "Cannot retrieve global attribute {}".format(attr_key))

        var_attr = {
            "standard_name": None,
            "long_name": None,
            "comment": None,
            "units": None,
            "original_name": None,
            "cell_methods": None,
            "cell_measures": None,
            "missing_value": None,
            "_FillValue": None,
        }
        # check variable attributes
        for attr_key in var_attr:
            try:
                var_attr[attr_key] = ds.variables[
                    attr_dict["variable_id"]].getncattr(attr_key)
            except Exception as e:
                self.__errors += 1
                self.__messages.append(
                    "Cannot retrieve variable attribute {}".format(attr_key))

        var_meta = self.__cache["mip_tables"].get_variable_meta(
            attr_dict["table_id"], attr_dict["variable_id"])
        for key in var_meta:
            try:
                if key not in ["missing_value", "_FillValue"]:
                    self.__errors += not var_attr[key] == var_meta[key]
                else:
                    self.__errors += not var_attr[key] == 1.e+20
            except KeyError:
                self.__errors += 1
                self.__messages.append(
                    "Variable attribute '{}' absent in '{}'".format(
                        key, attr_dict["variable_id"]))

        try:
            further_info_url = "{}/{}.{}.{}.{}.{}.{}".format(
                ESDOC_BASE_URL, attr_dict["mip_era"],
                attr_dict["institution_id"], attr_dict["source_id"],
                attr_dict["experiment_id"], attr_dict["sub_experiment_id"],
                attr_dict["variant_label"])
            self._exists_and_valid(
                ds, "further_info_url",
                ValidatorFactory.value_in_validator([further_info_url]))
            self._exists_and_valid(
                ds, "variable_id",
                ValidatorFactory.value_in_validator(
                    self.__cache["mip_tables"].get_variables_from_table(
                        attr_dict["table_id"])))
            self._exists_and_valid(
                ds, "variant_label",
                ValidatorFactory.value_in_validator([
                    "r{}i{}p{}f{}".format(attr_dict["realization_index"],
                                          attr_dict["initialization_index"],
                                          attr_dict["physics_index"],
                                          attr_dict["forcing_index"])
                ]))
        except Exception as e:
            self.__errors += 1
            self.__messages.append(
                "Cannot retrieve attribute. Exception: {}".format(e.message))

        if (not hasattr(ds, "parent_experiment_id")
                or ds.getncattr("parent_experiment_id") == "no parent"):
            has_parent = False
        else:
            has_parent = True
            self._validate_cv_attribute(ds, "experiment-id",
                                        "parent_experiment_id")

        if has_parent:
            self._exists_and_valid(ds, "branch_method",
                                   ValidatorFactory.nonempty_validator())
            self._exists_and_valid(ds, "branch_time_in_child",
                                   ValidatorFactory.float_validator())
            self._exists_and_valid(ds, "branch_time_in_parent",
                                   ValidatorFactory.float_validator())
            self._validate_cv_attribute(ds, "activity-id",
                                        "parent_activity_id")
            self._validate_cv_attribute(ds, "experiment-id",
                                        "parent_experiment_id")
            self._exists_and_valid(
                ds, "parent_mip_era",
                ValidatorFactory.value_in_validator(["CMIP6"]))
            self._validate_cv_attribute(ds, "source-id", "parent_source_id")
            try:
                self._exists_and_valid(
                    ds, "parent_source_id",
                    ValidatorFactory.value_in_validator(
                        [attr_dict["source_id"]]))
            except NameError:
                # unable to validate source consistency
                self.__messages.append(
                    "Unable to check consistency of parent_source_id "
                    "with source_id")
                self.__errors += 1
            self._exists_and_valid(
                ds, "parent_time_units",
                ValidatorFactory.string_validator(r"^days since"))
            self._exists_and_valid(
                ds, "parent_variant_label",
                ValidatorFactory.string_validator(r"^r\d+i\d+p\d+f\d+$"))
        else:
            try:
                start_of_run = ds.variables["time"][0]
                self._does_not_exist_or_valid(
                    ds, "branch_time_in_child",
                    ValidatorFactory.value_in_validator([start_of_run]))
            except Exception:
                self.__messages.append("Unable to retrieve time variable")
            self._does_not_exist_or_valid(
                ds, "branch_time_in_parent",
                ValidatorFactory.value_in_validator([0.0]))

            no_parent_validator = ValidatorFactory.value_in_validator(
                ['no parent'])
            for attr in PARENT_ATTRIBUTES:
                self._does_not_exist_or_valid(ds, attr, no_parent_validator)

        level = BaseCheck.HIGH
        score = 1 if self.__errors == 0 else 0
        return self.make_result(level, score, out_of,
                                "Global attributes check", self.__messages)

    def _does_not_exist_or_valid(self, ds, attr, validator):
        """
        Test for validity of an optional attribute.

        Parameters
        ----------
        ds : netCDF4.Dataset
            an open ncdf file
        attr : str
            name of the attribute to be validated
        validator : callable
            validator to be used
        """
        if hasattr(ds, attr) and not validator(getattr(ds, attr)):
            self.__messages.append("Attribute {} needs to have a valid value "
                                   "or be omitted".format(attr))
            self.__errors += 1

    def _exists_and_valid(self, ds, attr, validator):
        """
        Test for validity of a mandatory attribute.

        Parameters
        ----------
        ds : netCDF4.Dataset
            an open ncdf file
        attr : str
            name of the attribute to be validated
        validator : callable
            validator to be used
        """

        if not hasattr(ds, attr) or not validator(getattr(ds, attr)):
            self.__messages.append(
                "Attribute {} must exist and have a proper value".format(attr))
            self.__errors += 1

    def _validate_cv_attribute(self, ds, collection, nc_name=None):
        """
        Test for presence of attributes derived from CMIP6 CV.

        Parameters
        ----------
        ds : netCDF4.Dataset
            an open ncdf file
        collection : str
            name of a pyessv collection
        nc_name : str, optional
            name of the attribute if different from the collection name
        """

        try:
            if nc_name is None:
                nc_name = collection.replace('-', '_')
            item = ds.getncattr(nc_name)
            validate = self._validate_term(item, collection, "label")
            if validate is None:
                self.__messages.append(
                    "Unknown CV collection type {}".format(collection))
                self.__errors += 1
            if not validate:
                self.__messages.append(
                    "Attribute {} has illegal value {}".format(nc_name, item))
                self.__errors += 1
        except Exception:
            self.__messages.append(
                "Attribute {} is missing from the ncdf file".format(nc_name))
            self.__errors += 1
 def _cache_controlled_vocabularies(self):
     """
     Loads controlled vocabularies once and caches them.
     """
     self._wcrp_cmip6_cv = pyessv.load('wcrp', 'cmip6')
Exemple #28
0
def _main(args):
    """Main entry point.

    """
    if not os.path.isdir(args.source):
        raise ValueError('ESGF vocab directory does not exist: {}'.format(
            args.source))

    # Load vocabulary.
    pyessv.load_cv()

    # CV authority = ECMWF.
    #_AUTHORITY = pyessv.create_authority(
    #    'ECMWF',
    #    'European Center for Medium-Range Weather Forecasts',
    #    label='ECMWF',
    #    url='https://www.ecmwf.int/',
    #    create_date=_CREATE_DATE
    #)

    # Process project modules:
    for module in _MODULES:
        # Set project.
        project = module.__name__[4:].replace('_', '-')

        # Set ini file handler.
        ini_section = _IniSection(project, args.source)

        # Load authority & create scope.
        if module in _MODULES_ECMWF:
            authority = _create_authority_ecmwf()
            scope = pyessv.load('ecmwf:{}'.format(project))
        else:
            authority = pyessv.load('wcrp')
            scope = pyessv.load('wcrp:{}'.format(project))

        if not scope:
            scope = _create_scope(authority, project)

        # Set scope data.
        scope.data = module.SCOPE_DATA or dict()
        #scope.data = scope.data or dict()
        #for field in module.SCOPE_DATA:
        #    scope.data[field] = ini_section.get_option(field, raw=True)

        # Create regex collections.
        collections = [
            i for i in module.COLLECTIONS if not inspect.isfunction(i[1])
        ]
        for collection_id, term_regex in collections:
            _create_collection(module,
                               scope,
                               collection_id,
                               term_regex=term_regex)

        # Create standard collections.
        collections = [
            i for i in module.COLLECTIONS if inspect.isfunction(i[1])
        ]
        for collection_id, term_factory in collections:
            ctx = _MappingExecutionContext(project, collection_id, ini_section)
            collection = _create_collection(module, scope, collection_id)
            try:
                term_factory = term_factory()
            except TypeError:
                pass
            for term_data in term_factory(ctx):
                try:
                    term_src, term_dst = term_data
                    t = _get_term(collection, term_dst)
                    s = pyessv.load(term_src)
                    if t not in s.associations:
                        s.associations.append(t)
                    if s not in t.associations:
                        t.associations.append(s)
                except (ValueError, AttributeError):
                    _get_term(collection, term_data)

    # Add to archive & persist to file system.
    pyessv.archive(authority)