def test_delete(): """pyessv-tests: io: delete. """ authority_dirs = os.listdir(LIB.DIR_ARCHIVE) authority_dir = os.path.join(LIB.DIR_ARCHIVE, tu.AUTHORITY_NAME) authority_manifest = os.path.join(authority_dir, 'MANIFEST') scope_dir = os.path.join(authority_dir, tu.SCOPE_NAME) collection_01_dir = os.path.join(scope_dir, tu.COLLECTION_01_NAME) collection_02_dir = os.path.join(scope_dir, tu.COLLECTION_02_NAME) collection_03_dir = os.path.join(scope_dir, tu.COLLECTION_03_NAME) term_01_file = os.path.join(collection_01_dir, tu.TERM_01_NAME) term_02_file = os.path.join(collection_02_dir, tu.TERM_02_NAME) term_03_file = os.path.join(collection_03_dir, tu.TERM_03_NAME) io_manager.write(LIB.load(tu.AUTHORITY_NAMESPACE)) for namespace, npath, predicate in ( (tu.TERM_01_NAMESPACE, term_01_file, os.path.isfile), (tu.TERM_02_NAMESPACE, term_02_file, os.path.isfile), (tu.TERM_03_NAMESPACE, term_03_file, os.path.isfile), (tu.COLLECTION_01_NAMESPACE, collection_01_dir, os.path.isdir), (tu.COLLECTION_02_NAMESPACE, collection_02_dir, os.path.isdir), (tu.COLLECTION_03_NAMESPACE, collection_03_dir, os.path.isdir), (tu.SCOPE_NAMESPACE, scope_dir, os.path.isdir), (tu.AUTHORITY_NAMESPACE, authority_dir, os.path.isdir), ): node = LIB.load(namespace) io_manager.delete(node) assert not predicate(npath)
def validate_vocabularies(projects, experiments): """Validate various CV termsets within collections. """ _validate('PROJECT', pyessv.load('wcrp:cmip6:activity-id'), projects) _validate("EXPERIMENTS", pyessv.load('wcrp:cmip6:experiment-id'), experiments) print "------------------------------------------------------"
def _main(args): """Main entry point. """ if not os.path.isdir(args.source): raise ValueError('ESGF vocab directory does not exist: {}'.format( args.source)) # Process project modules: for module in _MODULES: # Set project. project = module.__name__[4:].replace('_', '-') # Set ini file handler. ini_section = _IniSection(project, args.source) # Load authority & create scope. if module in _MODULES_ECMWF: authority = _create_authority_ecmwf() scope = pyessv.load('ecmwf:{}'.format(project)) else: authority = pyessv.load('wcrp') scope = pyessv.load('wcrp:{}'.format(project)) if not scope: scope = _create_scope(authority, project) # Set scope data. scope.data = scope.data or dict() for field in module.SCOPE_DATA: scope.data[field] = ini_section.get_option(field, raw=True) # Create regex collections. collections = [ i for i in module.COLLECTIONS if not inspect.isfunction(i[1]) ] for collection_id, term_regex in collections: _create_collection(module, scope, collection_id, term_regex=term_regex) # Create standard collections. collections = [ i for i in module.COLLECTIONS if inspect.isfunction(i[1]) ] for collection_id, term_factory in collections: ctx = _MappingExecutionContext(project, collection_id, ini_section) collection = _create_collection(module, scope, collection_id) try: term_factory = term_factory() except TypeError: pass for term_data in term_factory(ctx): _get_term(collection, term_data) # Add to archive & persist to file system. pyessv.archive(authority)
def validate_vocabularies(projects, experiments): """Validate various CV termsets within collections. """ _validate( 'PROJECT', pyessv.load('wcrp:cmip6:activity-id'), projects ) _validate( "EXPERIMENTS", pyessv.load('wcrp:cmip6:experiment-id'), experiments ) print "------------------------------------------------------"
def test_search_setup(): """ERRATA :: WS :: SEARCH :: setup. """ # Invoke WS endpoint. r = requests.get(_URL_SEARCH_SETUP) # Assert WS response. obj = tu.assert_ws_response(_URL_SEARCH_SETUP, r, fields={'vocabs', 'values'}) # Assert vocabularies. for collection in obj['vocabs']: assert isinstance(pyessv.load(collection['namespace']), pyessv.Collection) for term in collection['terms']: assert isinstance(pyessv.load(term['namespace']), pyessv.Term)
def _test(func, typeof): """Inner test. """ node = func() tu.assert_object(node, typeof) assert node == load(node.namespace)
def _get_institutes(): """Returns canonical cmip6 institutes (derived from vocabularies). """ return [ i.canonical_name.split(':')[-1] for i in pyessv.load('wcrp:cmip6:institution-id') ]
def test_create(node_factory, node_type): """Test instantiation of domain entities. """ node = node_factory() tu.assert_object(node, node_type) loaded = load(node.namespace) assert node.namespace == loaded.namespace assert repr(node) == repr(loaded)
def write_authority(): """Writes ES-DOC authority. """ return pyessv.load('esdoc', verbose=False) or pyessv.create_authority( 'esdoc', 'Earth System Documentation', label='ES-DOC', url='https://es-doc.org', create_date=CREATE_DATE)
def _create_authority_ecmwf(): """Writes ECMWF authority. """ return pyessv.load('ecmwf', verbose=False) or pyessv.create_authority( 'ECMWF', 'European Center for Medium-Range Weather Forecasts', label='ECMWF', url='https://www.ecmwf.int', create_date=_CREATE_DATE)
def _set_output(): """Sets response to be returned to client. """ # Set include meta section flag. include_meta = self.get_argument(_PARAM_INCLUDE_META, 'false') == 'true' # Sets vocabulary identifier. identifier = ':'.join([ i.strip().lower().replace('_', '-') for i in self.request.path.split('/')[3:] ]) # Set output to be returned to client. if len(identifier) == 0: self.output = { 'data': [_encode(i, include_meta) for i in pyessv.load()] } else: self.output = { 'data': _encode(pyessv.load(identifier), include_meta) }
def _get_pid_tasks(issue, obj): """Returns PID service tasks extracted from issue data. """ pid_tasks = [] project = pyessv.load('esdoc:errata:project:{}'.format(issue.project)) if project.data['is_pid_client'] == True: for identifier in obj[JF_DATASETS]: task = PIDServiceTask() task.action = PID_ACTION_INSERT task.issue_uid = issue.uid task.dataset_id = identifier pid_tasks.append(task) return pid_tasks
def _write_scope(authority): """Writes ES-DOC cmip6 scope. """ scope = pyessv.load('esdoc:cmip6', verbose=False) or pyessv.create_scope( authority, 'cmip6', 'ES-DOC controlled Vocabularies (CVs) for use in cmip6', create_date=utils.CREATE_DATE, label='CMIP6', url='https://github.com/ES-DOC') _write_model_topic(scope) return scope
def _main(args): """Main entry point. """ # Open template. content = _get_template_content(args.template_fpath) # Create CORDEXP collections. for collection in pyessv.load("copernicus:cordexp"): data = '' for term in collection: data += '\t\'{}\'\n'.format(term.canonical_name) content = content.replace('[__CORDEXP_{}__]'.format(collection.raw_name.upper()), data) content = content.replace('[__CORDEXP_{}_RAW__]'.format(collection.raw_name.upper()), data) # Create CORDEX collections. data = '' for term in pyessv.load("ecmwf:c3s-cordex:institute"): data += '\t\'{}\'\n'.format(term.canonical_name) content = content.replace('[__CORDEX_INSTITUTION_ID__]', data) content = content.replace('[__CORDEX_INSTITUTION_ID_RAW__]', data) # Write output to file system. _set_output(args.output_fpath, content)
def _main(args): """Main entry point. """ if args.authority is None or len(args.authority.strip()) == 0: raise ValueError('Authority is a required parameter') for scope in pyessv.load(args.authority): if args.scope and args.scope != scope.canonical_name: continue for collection in scope: if args.collection and args.collection != collection.canonical_name: continue for term in collection: if args.term and args.term != term.canonical_name: continue print(term.namespace.replace(':', ' -> '))
def _map_collection(identifier): """Converts a pyessv collection to a dictionary. """ collection = pyessv.load(identifier) result = { 'canonical_name': collection.canonical_name, 'key': collection.namespace, 'label': collection.label, 'namespace': collection.namespace, 'terms': [_map_term(i) for i in collection] } if collection.data is not None: result.update(collection.data) return result
def extract_facets(project, data): """Extracts terms from a dataset identifer. :param str project: Project code. :param str|list data: Dataset identifier(s). :returns: Set of pyessv terms extracted from dataset identifier. :rtype: list """ seperator, targets = _CONFIG[project] facets = [] identifiers = [data] if isinstance(data, basestring) else data for identifier in identifiers: parts = identifier.split(seperator) facets += ['{}:{}'.format(i, parts[j]) for i, j in targets] return [pyessv.load(i) for i in set(facets)]
def _write_scope(authority): """Writes ES-DOC errata scope. """ scope = pyessv.load('esdoc:errata', verbose=False) or pyessv.create_scope( authority, 'errata', 'Controlled Vocabularies (CVs) for use in dataset errata', create_date=utils.CREATE_DATE, label='Dataset Errata', url='https://github.com/ES-DOC/esdoc-errata-ws') _write_projects(scope) _write_issue_severity(scope) _write_issue_status(scope) _write_pid_task_action(scope) _write_pid_task_status(scope) return scope
def _set_output(): """Sets response to be returned to client. """ # Set vocabs to be loaded. vocabs = { 'esdoc:errata:project', 'esdoc:errata:severity', 'esdoc:errata:status', } for project in pyessv.load('esdoc:errata:project'): for vocab in project.data['facets']: vocabs.add(vocab) # Get facet values. with db.session.create(): facet_values = set(db.dao.get_project_facets()) # Set output. self.output = { 'vocabs': [_map_collection(i) for i in sorted(vocabs)], 'values': facet_values }
def test_write(): """pyessv-tests: io: write. """ authority_dirs = os.listdir(LIB.DIR_ARCHIVE) authority_dir = os.path.join(LIB.DIR_ARCHIVE, tu.AUTHORITY_NAME) authority_manifest = os.path.join(authority_dir, 'MANIFEST') scope_dir = os.path.join(authority_dir, tu.SCOPE_NAME) collection_01_dir = os.path.join(scope_dir, tu.COLLECTION_01_NAME) collection_02_dir = os.path.join(scope_dir, tu.COLLECTION_02_NAME) collection_03_dir = os.path.join(scope_dir, tu.COLLECTION_03_NAME) term_01_file = os.path.join(collection_01_dir, tu.TERM_01_NAME) term_02_file = os.path.join(collection_02_dir, tu.TERM_02_NAME) term_03_file = os.path.join(collection_03_dir, tu.TERM_03_NAME) dpaths = (authority_dir, scope_dir, collection_01_dir, collection_02_dir, collection_03_dir) fpaths = (authority_manifest, term_01_file, term_02_file, term_03_file) for dpath in dpaths: assert not os.path.isdir(dpath) for fpath in fpaths: assert not os.path.isfile(fpath) io_manager.write(LIB.load(tu.AUTHORITY_NAME)) assert len(os.listdir(LIB.DIR_ARCHIVE)) == len(authority_dirs) + 1 for dpath in dpaths: assert os.path.isdir(dpath) for fpath in fpaths: assert os.path.isfile(fpath) with io.open(authority_manifest, 'r') as fstream: assert isinstance(json.loads(fstream.read()), dict) for fpath in fpaths: with io.open(fpath, 'r') as fstream: assert isinstance(json.loads(fstream.read()), dict)
def _main(): """Main entry point. """ # Set canonical institutes & actual GH teams. institutes = [i.canonical_name.split(':')[-1] for i in pyessv.load('wcrp:cmip6:institution-id')] teams = utils.get_teams(lambda i: i['name'].startswith('cmip6-')) # Set teams to be created. to_create = ['cmip6-{}'.format(i) for i in institutes if 'cmip6-{}'.format(i) not in teams] # Set teams to be deleted. to_delete = [i for i in teams.values() if i.name.startswith('cmip6') and i.institution_id not in institutes] # Escape when nothing to do. if len(to_create) == 0 and len(to_delete) == 0: pyessv.log("Teams are in sync - nothing todo") return # Update GH. for team_id in to_create: utils.create_team(team_id) for team in to_delete: utils.delete_team(team)
def _main(): """Main entry point. """ # Open template. with open(_TEMPLATE, 'r') as fstream: content = fstream.read() # Create CMIP6 collections. for scope in _VOCABS: for collection in [pyessv.load('wcrp:{}:{}'.format(scope, i)) for i in _VOCABS[scope]]: data = '' for term in collection: data += '\t\'{}\'\n'.format(term.canonical_name) content = content.replace('[{}]'.format(collection.raw_name.upper()), data) data = '' for term in collection: data += '\t\'{}\'\n'.format(term.raw_name) content = content.replace('[{}_RAW]'.format(collection.raw_name.upper()), data) # Write output to file system. with open(_OUTPUT, 'w') as fstream: fstream.write(content)
def _main(args): """Main entry point. """ # Open template. with open(_TEMPLATE, 'r') as fstream: content = fstream.read() # Create CMIP6 collections. for scope in _VOCABS: for collection in [pyessv.load('wcrp:{}:{}'.format(scope, i)) for i in _VOCABS[scope]]: data = '' for term in collection: data += '\t\'{}\'\n'.format(term.canonical_name) content = content.replace('[{}]'.format(collection.raw_name.upper()), data) data = '' for term in collection: data += '\t\'{}\'\n'.format(term.raw_name) content = content.replace('[{}_RAW]'.format(collection.raw_name.upper()), data) # Write output to file system. with open(args.output_fpath, 'w') as fstream: fstream.write(content)
def _get_institutes(): """Returns canonical cmip6 institutes (derived from vocabularies). """ return [ i.canonical_name.split(':')[-1] for i in pyessv.load('wcrp:cmip6:institution-id') ]
def _cache_controlled_vocabularies(self): """ Loads controlled vocabularies once and caches them. """ self._cvs = pyessv.load("{}:{}".format(self.authority, self.scope)) self._authority_info = pyessv.load(self.authority)
class CMIP6Check(BaseNCCheck): """ The CMIP6 checker class """ register_checker = True name = "cmip6" # validation of a term against a CV is only performed once # and the result cached __cache = { "cv": { "scope": pyessv.load("wcrp:cmip6"), "institutions": [ trm.data[u"postal_address"] for trm in pyessv.load('wcrp:cmip6:institution-id') ], "models": [], "experiments": [ trm.data[u"experiment"] for trm in pyessv.load("wcrp:cmip6:experiment-id") ] }, "mip_tables": MipTables(MIP_TABLES), "validated": { "canonical_name": {}, "label": {}, "raw_name": {} } } def __init__(self): super(CMIP6Check, self).__init__() self.__messages = [] self.__erorrs = 0 @classmethod def make_result(cls, level, score, out_of, name, messages): """A helper factory method for generating cc results""" return Result(level, (score, out_of), name, messages) @classmethod def _validate_term(cls, term, collection, term_type="canonical_name"): """Check a term against a CV, using cache if possible""" if collection in cls.__cache["validated"][term_type]: if term in cls.__cache["validated"][term_type][collection]: return cls.__cache["validated"][term_type][collection][term] else: # perform check try: cls.__cache["validated"][term_type][collection][term] = ( term in [ getattr(trm, term_type) for trm in cls.__cache["cv"]["scope"][collection].terms ]) except ValueError: cls.__cache["validated"][term_type][collection][ term] = None else: cls.__cache["validated"][term_type][collection] = {} if collection in cls.__cache["cv"]["scope"]: cls.__cache["validated"][term_type][collection][term] = ( term in [ getattr(trm, term_type) for trm in cls.__cache["cv"]["scope"][collection].terms ]) else: cls.__cache["validated"][term_type][collection][term] = None return cls.__cache["validated"][term_type][collection][term] def check_filename(self, ds): """ Tests filename's facets against a CV <variable_id> tas <table_id> Amon <source_id> hadgem3-es <experiment_id> piCtrl <member_id> r1i1p1f1 <grid_label> gn [<time_range>] 201601-210012 .nc Parameters ---------- ds : netCDF4.Dataset an open ncdf file Returns ------- compliance_checker.base.Result container with check's results """ filename = os.path.basename(ds.filepath()) filename_parts = filename.split('.')[0].split('_') template_dict = { "table-id": 1, "source-id": 2, "experiment-id": 3, "grid-label": 5 } messages = [] valid_filename = True level = BaseCheck.MEDIUM out_of = 1 score = 0 for cv in template_dict: if not self._validate_term( filename_parts[template_dict[cv]].lower(), cv): messages.append("Invalid term {} in the filename {}".format( cv, filename)) valid_filename = False else: attr = ds.getncattr(cv.replace('-', '_')) if attr != filename_parts[template_dict[cv]]: valid_filename = False messages.append( "Value {} of the attribute {} doesn't match filename {}" .format(attr, cv, filename)) member_id = filename_parts[4].split('-') if len(member_id) > 1: if not self._validate_term(member_id[1], "experiment-id"): messages.append("Invalid term {} in the filename {}".format( "sub_experiment_id", filename)) valid_filename = False if re.match(r"^r\d+i\d+p\d+f\d+$", member_id[0]) is None: valid_filename = False messages.append("Invalid variant_label {}".format(member_id[0])) else: variant_label = ds.getncattr("variant_label") if variant_label != member_id[0]: valid_filename = False messages.append( "Variant label {} is not consistent with file contents ({})" .format(member_id[0], variant_label)) if filename_parts[1] in self.__cache["mip_tables"].names: if filename_parts[0] not in self.__cache[ "mip_tables"].get_variables_from_table(filename_parts[1]): valid_filename = False if len(filename_parts) == 7: try: frequency = ds.getncattr("frequency") d1, d2 = parse_date_range(filename_parts[6], frequency) except Exception as e: valid_filename = False messages.append("Invalid daterange {} ({})".format( filename_parts[6], e.message)) if valid_filename: score += 1 return self.make_result(level, score, out_of, "DRS template check", messages) def check_global_attributes(self, ds): """ Checks for existence and validity of global attributes. Parameters ---------- ds : netCDF4.Dataset an open ncdf file Returns ------- compliance_checker.base.Result container with check's results """ out_of = 1 score = 0 self.__errors = 0 self.__messages = [] dreq_version = self.__cache["mip_tables"].version # create validators positive_integer_validator = ValidatorFactory.integer_validator() nonempty_string_validator = ValidatorFactory.string_validator() # test for presence and contents of attributes contained in CV for cv_attribute in CV_ATTRIBUTES: self._validate_cv_attribute(ds, cv_attribute) # test if rfip indexes are positive integers for index_attribute in RUN_INDEX_ATTRIBUTES: self._exists_and_valid(ds, index_attribute, positive_integer_validator) # test if grid attribute is a non-empty string for mandatory_string in MANDATORY_TEXT_ATTRIBUTES: self._exists_and_valid(ds, mandatory_string, nonempty_string_validator) # tests if optional attrbutes are non-empty or don't appear at all for optional_string in OPTIONAL_TEXT_ATTRIBUTES: self._does_not_exist_or_valid(ds, optional_string, nonempty_string_validator) # validate experiment and institution descriptions self._exists_and_valid( ds, "experiment", ValidatorFactory.value_in_validator( self.__cache["cv"]["experiments"])) self._exists_and_valid( ds, "institution", ValidatorFactory.value_in_validator( self.__cache["cv"]["institutions"])) # validate CF convention self._exists_and_valid( ds, "Conventions", ValidatorFactory.value_in_validator(CF_CONVENTIONS)) # validate creation date self._exists_and_valid( ds, "creation_date", ValidatorFactory.date_validator("%Y-%m-%dT%H:%M:%SZ")) # validate if data specification version is consistent with CMOR self._exists_and_valid( ds, "data_specs_version", ValidatorFactory.value_in_validator([dreq_version])) # validate external variables and other strings self._does_not_exist_or_valid( ds, "external_variables", ValidatorFactory.value_in_validator(["areacella", "areacello"])) self._exists_and_valid( ds, "license", ValidatorFactory.value_in_validator([LICENSE_TEXT])) self._exists_and_valid(ds, "mip_era", ValidatorFactory.value_in_validator(["CMIP6"])) self._exists_and_valid( ds, "product", ValidatorFactory.value_in_validator(["model-output"])) self._exists_and_valid(ds, "source", ValidatorFactory.string_validator(SOURCE_REGEX)) self._exists_and_valid( ds, "tracking_id", ValidatorFactory.string_validator( r"^hdl:21.14100\/[a-zA-Z\d\-]+$")) attr_dict = { "forcing_index": None, "realization_index": None, "initialization_index": None, "physics_index": None, "experiment_id": None, "sub_experiment_id": None, "variant_label": None, "mip_era": None, "source_id": None, "institution_id": None, "table_id": None, "variable_id": None, } # populate attribute dictionary with values for attr_key in attr_dict: try: attr_dict[attr_key] = ds.getncattr(attr_key) except Exception as e: self.__errors += 1 self.__messages.append( "Cannot retrieve global attribute {}".format(attr_key)) var_attr = { "standard_name": None, "long_name": None, "comment": None, "units": None, "original_name": None, "cell_methods": None, "cell_measures": None, "missing_value": None, "_FillValue": None, } # check variable attributes for attr_key in var_attr: try: var_attr[attr_key] = ds.variables[ attr_dict["variable_id"]].getncattr(attr_key) except Exception as e: self.__errors += 1 self.__messages.append( "Cannot retrieve variable attribute {}".format(attr_key)) var_meta = self.__cache["mip_tables"].get_variable_meta( attr_dict["table_id"], attr_dict["variable_id"]) for key in var_meta: try: if key not in ["missing_value", "_FillValue"]: self.__errors += not var_attr[key] == var_meta[key] else: self.__errors += not var_attr[key] == 1.e+20 except KeyError: self.__errors += 1 self.__messages.append( "Variable attribute '{}' absent in '{}'".format( key, attr_dict["variable_id"])) try: further_info_url = "{}/{}.{}.{}.{}.{}.{}".format( ESDOC_BASE_URL, attr_dict["mip_era"], attr_dict["institution_id"], attr_dict["source_id"], attr_dict["experiment_id"], attr_dict["sub_experiment_id"], attr_dict["variant_label"]) self._exists_and_valid( ds, "further_info_url", ValidatorFactory.value_in_validator([further_info_url])) self._exists_and_valid( ds, "variable_id", ValidatorFactory.value_in_validator( self.__cache["mip_tables"].get_variables_from_table( attr_dict["table_id"]))) self._exists_and_valid( ds, "variant_label", ValidatorFactory.value_in_validator([ "r{}i{}p{}f{}".format(attr_dict["realization_index"], attr_dict["initialization_index"], attr_dict["physics_index"], attr_dict["forcing_index"]) ])) except Exception as e: self.__errors += 1 self.__messages.append( "Cannot retrieve attribute. Exception: {}".format(e.message)) if (not hasattr(ds, "parent_experiment_id") or ds.getncattr("parent_experiment_id") == "no parent"): has_parent = False else: has_parent = True self._validate_cv_attribute(ds, "experiment-id", "parent_experiment_id") if has_parent: self._exists_and_valid(ds, "branch_method", ValidatorFactory.nonempty_validator()) self._exists_and_valid(ds, "branch_time_in_child", ValidatorFactory.float_validator()) self._exists_and_valid(ds, "branch_time_in_parent", ValidatorFactory.float_validator()) self._validate_cv_attribute(ds, "activity-id", "parent_activity_id") self._validate_cv_attribute(ds, "experiment-id", "parent_experiment_id") self._exists_and_valid( ds, "parent_mip_era", ValidatorFactory.value_in_validator(["CMIP6"])) self._validate_cv_attribute(ds, "source-id", "parent_source_id") try: self._exists_and_valid( ds, "parent_source_id", ValidatorFactory.value_in_validator( [attr_dict["source_id"]])) except NameError: # unable to validate source consistency self.__messages.append( "Unable to check consistency of parent_source_id " "with source_id") self.__errors += 1 self._exists_and_valid( ds, "parent_time_units", ValidatorFactory.string_validator(r"^days since")) self._exists_and_valid( ds, "parent_variant_label", ValidatorFactory.string_validator(r"^r\d+i\d+p\d+f\d+$")) else: try: start_of_run = ds.variables["time"][0] self._does_not_exist_or_valid( ds, "branch_time_in_child", ValidatorFactory.value_in_validator([start_of_run])) except Exception: self.__messages.append("Unable to retrieve time variable") self._does_not_exist_or_valid( ds, "branch_time_in_parent", ValidatorFactory.value_in_validator([0.0])) no_parent_validator = ValidatorFactory.value_in_validator( ['no parent']) for attr in PARENT_ATTRIBUTES: self._does_not_exist_or_valid(ds, attr, no_parent_validator) level = BaseCheck.HIGH score = 1 if self.__errors == 0 else 0 return self.make_result(level, score, out_of, "Global attributes check", self.__messages) def _does_not_exist_or_valid(self, ds, attr, validator): """ Test for validity of an optional attribute. Parameters ---------- ds : netCDF4.Dataset an open ncdf file attr : str name of the attribute to be validated validator : callable validator to be used """ if hasattr(ds, attr) and not validator(getattr(ds, attr)): self.__messages.append("Attribute {} needs to have a valid value " "or be omitted".format(attr)) self.__errors += 1 def _exists_and_valid(self, ds, attr, validator): """ Test for validity of a mandatory attribute. Parameters ---------- ds : netCDF4.Dataset an open ncdf file attr : str name of the attribute to be validated validator : callable validator to be used """ if not hasattr(ds, attr) or not validator(getattr(ds, attr)): self.__messages.append( "Attribute {} must exist and have a proper value".format(attr)) self.__errors += 1 def _validate_cv_attribute(self, ds, collection, nc_name=None): """ Test for presence of attributes derived from CMIP6 CV. Parameters ---------- ds : netCDF4.Dataset an open ncdf file collection : str name of a pyessv collection nc_name : str, optional name of the attribute if different from the collection name """ try: if nc_name is None: nc_name = collection.replace('-', '_') item = ds.getncattr(nc_name) validate = self._validate_term(item, collection, "label") if validate is None: self.__messages.append( "Unknown CV collection type {}".format(collection)) self.__errors += 1 if not validate: self.__messages.append( "Attribute {} has illegal value {}".format(nc_name, item)) self.__errors += 1 except Exception: self.__messages.append( "Attribute {} is missing from the ncdf file".format(nc_name)) self.__errors += 1
def _cache_controlled_vocabularies(self): """ Loads controlled vocabularies once and caches them. """ self._wcrp_cmip6_cv = pyessv.load('wcrp', 'cmip6')
def _main(args): """Main entry point. """ if not os.path.isdir(args.source): raise ValueError('ESGF vocab directory does not exist: {}'.format( args.source)) # Load vocabulary. pyessv.load_cv() # CV authority = ECMWF. #_AUTHORITY = pyessv.create_authority( # 'ECMWF', # 'European Center for Medium-Range Weather Forecasts', # label='ECMWF', # url='https://www.ecmwf.int/', # create_date=_CREATE_DATE #) # Process project modules: for module in _MODULES: # Set project. project = module.__name__[4:].replace('_', '-') # Set ini file handler. ini_section = _IniSection(project, args.source) # Load authority & create scope. if module in _MODULES_ECMWF: authority = _create_authority_ecmwf() scope = pyessv.load('ecmwf:{}'.format(project)) else: authority = pyessv.load('wcrp') scope = pyessv.load('wcrp:{}'.format(project)) if not scope: scope = _create_scope(authority, project) # Set scope data. scope.data = module.SCOPE_DATA or dict() #scope.data = scope.data or dict() #for field in module.SCOPE_DATA: # scope.data[field] = ini_section.get_option(field, raw=True) # Create regex collections. collections = [ i for i in module.COLLECTIONS if not inspect.isfunction(i[1]) ] for collection_id, term_regex in collections: _create_collection(module, scope, collection_id, term_regex=term_regex) # Create standard collections. collections = [ i for i in module.COLLECTIONS if inspect.isfunction(i[1]) ] for collection_id, term_factory in collections: ctx = _MappingExecutionContext(project, collection_id, ini_section) collection = _create_collection(module, scope, collection_id) try: term_factory = term_factory() except TypeError: pass for term_data in term_factory(ctx): try: term_src, term_dst = term_data t = _get_term(collection, term_dst) s = pyessv.load(term_src) if t not in s.associations: s.associations.append(t) if s not in t.associations: t.associations.append(s) except (ValueError, AttributeError): _get_term(collection, term_data) # Add to archive & persist to file system. pyessv.archive(authority)