def _get_result(self, primary_arg): ds = primary_arg var_id = self.kwargs["var_id"] messages = [] score = 0 # Check the variable exists first if var_id not in ds.variables: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages) score += 1 vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2]) expected_values = vocabs.get_value("coordinate:{}".format(var_id), "data")["value"] actual_values = ds.variables[var_id][:] # Cast to a list if not iterable if not hasattr(actual_values, "__len__"): actual_values = [actual_values] if list(expected_values) == list(actual_values): score += 1 else: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def _get_result(self, primary_arg): ds = primary_arg score = 0 messages = [] # Check main variable is identifiable first try: variable = nc_util.get_main_variable(ds) score += 1 except: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages) # Now check attribute attr_name = self.kwargs["attr_name"] if attr_name not in variable.ncattrs(): messages = [self.get_messages()[score]] else: score += 1 # Check the value of attribute expected_value = self.kwargs["attr_value"] check = nc_util.check_nc_attribute(variable, attr_name, expected_value) if check: score += 1 else: messages.append(self.get_messages()[score]) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def _get_result(self, primary_arg): ds = primary_arg var_id = self.kwargs["var_id"] messages = [] score = 0 # Check the variable exists first if var_id not in ds.variables: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages) score += 1 # Now check the "bounds" attribute exists and relates to a separate variable variable = ds.variables[var_id] if "bounds" in variable.ncattrs() and getattr( variable, "bounds") in ds.variables: score += 1 else: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def _get_result(self, primary_arg): ds = primary_arg var_id = self.kwargs["var_id"] messages = [] score = 0 # Check the variable exists first if var_id not in ds.variables: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages) score += 1 vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2]) expected_length = vocabs.get_value("coordinate:{}".format(var_id), "data")["length"] actual_length = len(ds.variables[var_id][:]) if expected_length == actual_length: score += 1 else: messages = [self.get_messages()[score]] return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def check_file_name_field4(self, ds): ''' Check file name field4 matches time_coverage_start attribute ''' ret_val = [] result_name = ['file_name', 'check_file_name_field4'] reasoning = [ "File name field4 doesn't match time_coverage_start attribute" ] time_coverage_start = getattr(ds.dataset, 'time_coverage_start', None) passed = False if time_coverage_start is not None: # time_coverage_start format is yyyy-mm-ddTHH:MM:SSZ while # field4 format is yyyymmddTHHMMSSZ time_coverage_start = time_coverage_start.replace("-", "") time_coverage_start = time_coverage_start.replace(":", "") if self._file_names_length >= 4: field4 = self._file_names[3] if field4 != time_coverage_start: passed = False else: passed = True if passed: result = Result(BaseCheck.HIGH, True, result_name, None) else: result = Result(BaseCheck.HIGH, False, result_name, reasoning) ret_val.append(result) return ret_val
def check_time_extents(self, ds): """ Check that the values of time_coverage_start/time_coverage_end approximately match the data. """ if not (hasattr(ds, 'time_coverage_start') and hasattr(ds, 'time_coverage_end')): return # Parse the ISO 8601 formatted dates try: t_min = dateparse(ds.time_coverage_start) t_max = dateparse(ds.time_coverage_end) except: return Result(BaseCheck.MEDIUM, False, 'time_coverage_extents_match', ['time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format.']) timevar = cfutil.get_time_variable(ds) if not timevar: return Result(BaseCheck.MEDIUM, False, 'time_coverage_extents_match', ['Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4']) # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY try: # num2date returns as naive date, but with time adjusted to UTC # we need to attach timezone information here, or the date # subtraction from t_min/t_max will assume that a naive timestamp is # in the same time zone and cause erroneous results. # Pendulum uses UTC by default, but we are being explicit here time0 = pendulum.instance(num2date(ds.variables[timevar][0], ds.variables[timevar].units), 'UTC') time1 = pendulum.instance(num2date(ds.variables[timevar][-1], ds.variables[timevar].units), 'UTC') except: return Result(BaseCheck.MEDIUM, False, 'time_coverage_extents_match', ['Failed to retrieve and convert times for variables %s.' % timevar]) start_dt = abs(time0 - t_min) end_dt = abs(time1 - t_max) score = 2 msgs = [] if start_dt > timedelta(hours=1): msgs.append("Date time mismatch between time_coverage_start and actual " "time values %s (time_coverage_start) != %s (time[0])" % (t_min.isoformat(), time0.isoformat())) score -= 1 if end_dt > timedelta(hours=1): msgs.append("Date time mismatch between time_coverage_end and actual " "time values %s (time_coverage_end) != %s (time[N])" % (t_max.isoformat(), time1.isoformat())) score -= 1 return Result(BaseCheck.MEDIUM, (score, 2), 'time_coverage_extents_match', msgs)
def check_time_extents(self, ds): """ Check that the values of time_coverage_start/time_coverage_end approximately match the data. """ if not (hasattr(ds, 'time_coverage_start') and hasattr(ds, 'time_coverage_end')): return # allows non-ISO 8601 formatted dates try: t_min = dateparse(ds.time_coverage_start) t_max = dateparse(ds.time_coverage_end) except: return Result( BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [ 'time_coverage variables are not formatted properly. Please ensure they are valid ISO-8601 time strings' ]) timevar = cfutil.get_time_variable(ds) if not timevar: return Result( BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [ 'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4' ]) # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY try: time0 = num2date(ds.variables[timevar][0], ds.variables[timevar].units) time1 = num2date(ds.variables[timevar][-1], ds.variables[timevar].units) except: return Result( BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [ 'Failed to retrieve and convert times for variables %s.' % timevar ]) start_dt = abs(time0 - t_min) end_dt = abs(time1 - t_max) score = 2 msgs = [] if start_dt > timedelta(hours=1): msgs.append( "Date time mismatch between time_coverage_start and actual " "time values %s (time_coverage_start) != %s (time[0])" % (t_min.isoformat(), time0.isoformat())) score -= 1 if end_dt > timedelta(hours=1): msgs.append( "Date time mismatch between time_coverage_end and actual " "time values %s (time_coverage_end) != %s (time[N])" % (t_max.isoformat(), time1.isoformat())) score -= 1 return Result(BaseCheck.MEDIUM, (score, 2), 'time_coverage_extents_match', msgs)
def _check_total_z_extents(self, ds, z_variable): """ Check the entire array of Z for minimum and maximum and compare that to the vertical extents defined in the global attributes :param netCDF4.Dataset ds: An open netCDF dataset :param str z_variable: Name of the variable representing the Z-Axis """ msgs = [] total = 2 try: vert_min = float(ds.geospatial_vertical_min) except ValueError: msgs.append("geospatial_vertical_min cannot be cast to float") try: vert_max = float(ds.geospatial_vertical_max) except ValueError: msgs.append("geospatial_vertical_max cannot be cast to float") if len(msgs) > 0: return Result( BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs ) zvalue = ds.variables[z_variable][:] # If the array has fill values, which is allowed in the case of point # features if hasattr(zvalue, "mask"): zvalue = zvalue[~zvalue.mask] if zvalue.size == 0: msgs.append( "Cannot compare geospatial vertical extents " "against min/max of data, as non-masked data " "length is zero" ) return Result( BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs ) else: zmin = zvalue.min() zmax = zvalue.max() if not np.isclose(vert_min, zmin): msgs.append( "geospatial_vertical_min != min(%s) values, %s != %s" % (z_variable, vert_min, zmin) ) if not np.isclose(vert_max, zmax): msgs.append( "geospatial_vertical_max != max(%s) values, %s != %s" % (z_variable, vert_min, zmax) ) return Result( BaseCheck.MEDIUM, (total - len(msgs), total), "geospatial_vertical_extents_match", msgs, )
def check_time_extents(self, ds): """ Check that the values of time_coverage_start/time_coverage_end approximately match the data. """ if not (hasattr(ds.dataset, 'time_coverage_start') and hasattr(ds.dataset, 'time_coverage_end')): return epoch = parse_dt("1970-01-01 00:00:00 UTC") t_min = (parse_dt(ds.dataset.time_coverage_start) - epoch).total_seconds() t_max = (parse_dt(ds.dataset.time_coverage_end) - epoch).total_seconds() # identify t vars as per CF 4.4 t_vars = [ var for name, var in ds.dataset.variables.iteritems() if is_time_variable(name, var) ] if len(t_vars) == 0: return Result( BaseCheck.MEDIUM, False, 'time_coverage_extents_match', 'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4' ) obs_mins = { var._name: Unit(str( var.units)).get_converter("seconds since 1970-01-01").evaluate( np.nanmin(var)) for var in t_vars } obs_maxs = { var._name: Unit(str( var.units)).get_converter("seconds since 1970-01-01").evaluate( np.nanmax(var)) for var in t_vars } min_pass = any( (np.isclose(t_min, min_val) for min_val in obs_mins.itervalues())) max_pass = any( (np.isclose(t_max, max_val) for max_val in obs_maxs.itervalues())) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_start value (%s)" % (obs_mins, t_min)) if not max_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_end value (%s)" % (obs_maxs, t_max)) return Result(BaseCheck.MEDIUM, (allpass, 2), 'time_coverage_extents_match', msgs)
def _get_result(self, primary_arg): ds = primary_arg score = 0 var_id = self._get_var_id(ds) # Check the variable first (will match if `var_id` is None from previous call) if var_id not in ds.variables: messages = self.get_messages()[:1] return Result(self.level, (score, self.out_of), self.get_short_name(), messages) # Work out the overall 'out of' value based on number of attributes vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2]) lookup = ":".join([self.kwargs["pyessv_namespace"], var_id]) expected_attr_dict = vocabs.get_value(lookup, "data") self.out_of = 1 + len(expected_attr_dict) * 2 score += 1 variable = ds.variables[var_id] messages = [] # Check the variable attributes one-by-one for attr, expected_value in expected_attr_dict.items(): # Check items to ignore ignores = self.kwargs["ignores"] if ignores and attr in ignores: self.out_of -= 2 continue KNOWN_IGNORE_VALUES = ("<derived from file>",) if expected_value in KNOWN_IGNORE_VALUES: self.out_of -= 2 continue if attr not in variable.ncattrs(): messages.append("Required variable attribute '{}' is not present for " "variable: '{}'.".format(attr, var_id)) else: score += 1 # Check the value of attribute check = nc_util.check_nc_attribute(variable, attr, expected_value) if check: score += 1 else: messages.append(u"Required variable attribute '{}' has incorrect value ('{}') " u"for variable: '{}'. Value should be: '{}'.".format(attr, getattr(variable, attr), var_id, expected_value)) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def check_id_has_no_blanks(self, ds): #Check if there are blanks in the id field if not hasattr(ds, u'id'): return if ' ' in getattr(ds, u'id'): return Result(BaseCheck.MEDIUM, False, 'no_blanks_in_id', msgs=[u'There should be no blanks in the id field']) else: return Result(BaseCheck.MEDIUM, True, 'no_blanks_in_id', msgs=[])
def check_id_has_no_blanks(self, ds): ''' Check if there are blanks in the id field :param netCDF4.Dataset ds: An open netCDF dataset ''' if not hasattr(ds, u'id'): return if ' ' in getattr(ds, u'id'): return Result(BaseCheck.MEDIUM, False, 'no_blanks_in_id', msgs=[u'There should be no blanks in the id field']) else: return Result(BaseCheck.MEDIUM, True, 'no_blanks_in_id', msgs=[])
def check_vertical_extents(self, ds): """ Check that the values of geospatial_vertical_min/geospatial_vertical_max approximately match the data. """ if not (hasattr(ds, 'geospatial_vertical_min') and hasattr(ds, 'geospatial_vertical_max')): return vert_min = ds.geospatial_vertical_min vert_max = ds.geospatial_vertical_max # identify vertical vars as per CF 4.3 v_vars = [ var for name, var in ds.variables.items() if is_vertical_coordinate(name, var) ] if len(v_vars) == 0: return Result( BaseCheck.MEDIUM, False, 'geospatial_vertical_extents_match', [ 'Could not find vertical variable to test extent of geospatial_vertical_min/geospatial_vertical_max, see CF-1.6 spec chapter 4.3' ]) obs_mins = { var._name: np.nanmin(var) for var in v_vars if not np.isnan(var).all() } obs_maxs = { var._name: np.nanmax(var) for var in v_vars if not np.isnan(var).all() } min_pass = any( (np.isclose(vert_min, min_val) for min_val in obs_mins.values())) max_pass = any( (np.isclose(vert_max, max_val) for max_val in obs_maxs.values())) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( "Data for possible vertical variables (%s) did not match geospatial_vertical_min value (%s)" % (obs_mins, vert_min)) if not max_pass: msgs.append( "Data for possible vertical variables (%s) did not match geospatial_vertical_max value (%s)" % (obs_maxs, vert_max)) return Result(BaseCheck.MEDIUM, (allpass, 2), 'geospatial_vertical_extents_match', msgs)
def check_altitude_units(self, ds): """ If there's a variable named z, it must have units. @TODO: this is duplicated with check_variable_units """ if 'z' in ds.variables: msgs = [] val = 'units' in ds.variables['z'].ncattrs() if not val: msgs.append("Variable 'z' has no units attr") return Result(BaseCheck.LOW, val, 'Altitude Units', msgs) return Result(BaseCheck.LOW, (0, 0), 'Altitude Units', ["Dataset has no 'z' variable"])
def check_var_coverage_content_type(self, ds): results = [] platform_variable_name = getattr(ds, 'platform', None) for variable in ds.variables: msgs = [] if variable in {'crs', platform_variable_name}: continue ctype = getattr(ds.variables[variable], 'coverage_content_type', None) check = ctype is not None if not check: msgs.append("Var %s missing attr coverage_content_type" % variable) results.append( Result(BaseCheck.HIGH, check, (variable, "coverage_content_type"), msgs)) return results # ISO 19115-1 codes valid_ctypes = { 'image', 'thematicClassification', 'physicalMeasurement', 'auxiliaryInformation', 'qualityInformation', 'referenceInformation', 'modelResult', 'coordinate' } if not ctype in valid_ctypes: msgs.append( "Var %s does not have a coverage_content_type in %s" % (variable, sorted(valid_ctypes))) return results
class NCFileIsReadableCheck(FileCheckBase): """ Data file is recognised as a valid netCDF file, using sub-format: {file_format}. """ short_name = "File is netCDF" defaults = {"file_format": "NETCDF4_CLASSIC"} message_templates = [ "File is not in required netCDF format: {file_format}." ] level = "HIGH" def _get_result(self, primary_arg): from netCDF4 import Dataset try: ds = Dataset(primary_arg) assert (type(ds.variables) == OrderedDict) assert (type(ds.dimensions) == OrderedDict) assert (ds.file_format == self.kwargs['file_format']) success = True except Exception, err: success = False messages = [] if success: score = self.out_of else: score = 0 messages.append(self.get_messages()[score]) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def check_var_units(self, ds): results = [] # We don't check certain container variables for units platform_variable_name = getattr(ds, 'platform', None) for variable in ds.variables: msgs = [] if variable in ('crs', platform_variable_name): continue # If the variable is a QC flag, we don't need units std_name = getattr(ds.variables[variable], 'standard_name', None) if std_name is not None: if 'status_flag' in std_name: continue # Check units and dims for variable unit_check = hasattr(ds.variables[variable], 'units') no_dim_check = (getattr(ds.variables[variable], 'dimensions') == tuple()) # Check if we have no dimensions. If no dims, skip test if no_dim_check: continue # Check if we have no units if not unit_check: msgs.append("Var %s missing attr units" % variable) results.append( Result(BaseCheck.HIGH, unit_check, (variable, "var_units"), msgs)) return results
def check_date_issued_is_iso(self, ds): #Checks if date issued field is ISO compliant if not hasattr(ds, u'date_issued'): return date_issued_check, msgs = datetime_is_iso(getattr(ds, u'date_issued')) return Result(BaseCheck.MEDIUM, date_issued_check, 'date_issued_is_iso', msgs)
def trim_groups(r): if isinstance(r.name, tuple) or isinstance(r.name, list): new_name = r.name[1:] else: new_name = [] return Result(r.weight, r.value, new_name, r.msgs)
def _get_result(self, primary_arg): ds = primary_arg score = 0 self.out_of = 1 messages = [] vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2]) var_id = self.kwargs["var_id"] if var_id in ds.variables: array = ds[var_id][:] result = vocabs.check_array_matches_terms( array, self.kwargs["pyessv_namespace"]) if result: score += 1 else: messages.append(self.get_messages()[score]) else: messages.append( "Variable '{}' not found in the file so cannot perform other checks." .format(var_id)) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def check_extension_name(self, ds): ''' Check file extension name and ensure it equals to nc ''' ret_val = [] result_name = ['file_name', 'check_extension_name'] reasoning = ["File extension name is not equal to nc"] if not self._file_extension_name == 'nc': result = Result(BaseCheck.HIGH, False, result_name, reasoning) else: result = Result(BaseCheck.HIGH, True, result_name, None) ret_val.append(result) return ret_val
def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type res = [ Result(BaseCheck.MEDIUM, True, 'one'), Result(BaseCheck.MEDIUM, (1, 3), 'one'), Result(BaseCheck.MEDIUM, None, 'one'), Result(BaseCheck.MEDIUM, True, 'two'), Result(BaseCheck.MEDIUM, np.isnan(1), 'two') # value is type numpy.bool_ ] score = self.cs.scores(res) self.assertEqual(score[0].name, 'one') self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, 'two') self.assertEqual(score[1].value, (1, 2))
def _has_var_attr(cls, dataset, vname, attr, concept_name, priority=BaseCheck.HIGH): """ Checks for the existance of an attr on variable vname in dataset, with the name/message using concept_name. """ val = True msgs = [] if vname not in dataset.variables: val = False msgs.append( "Variable '{}' not present while checking for attr '{}' for IOOS concept: '{}'" .format(vname, attr, concept_name)) else: v = dataset.variables[vname] if attr not in v.ncattrs(): val = False msgs.append( "Attr '{}' not present on var '{}' while checking for IOOS concept: '{}'" .format(attr, vname, concept_name)) return Result(priority, val, concept_name, msgs)
def _get_result(self, primary_arg): ds = primary_arg score = 0 messages = [] vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2]) fname = os.path.basename(ds.filepath()) fn_score, msg = vocabs.check_file_name(fname, keys=self.kwargs["order"], delimiter=self.kwargs["delimiter"], extension=self.kwargs["extension"]) score += fn_score if fn_score < (self.out_of / 3.): # a third of the marks are for the file name check messages.extend(msg) # Check global attributes one-by-one items = os.path.splitext(fname)[0].split(self.kwargs["delimiter"]) for i, attr in enumerate(self.kwargs["order"]): if attr.startswith('regex:') or attr in self.kwargs["ignore_attr_checks"]: # Case 1: we do not have the attribute name - so cannot check # Case 2: instructed to not perform this check continue res, msg = vocabs.check_global_attribute_value(ds, attr, items[i], property="raw_name") score += res if res < 2: messages.extend(msg) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def build_group(label=None, weight=None, value=None, sub=None): label = label weight = weight value = self._translate_value(value) sub = sub or [] return Result(weight=weight, value=value, name=label, children=sub)
def check_acknowledgment(self, ds): """Check if acknowledgment/acknowledgment attr is present""" if not (hasattr(ds, 'acknowledgment') or hasattr(ds, 'acknowledgement')): return Result(BaseCheck.MEDIUM, False, 'acknowledgment/acknowledgement', msgs=[]) else: return Result( BaseCheck.HIGH, True, 'acknowledgment/acknowledgement', msgs=[ "Neither 'acknowledgment' nor 'acknowledgement' attributes present" ])
def check_var_coverage_content_type(self, ds): ''' Check coverage content type against valid ISO-19115-1 codes :param netCDF4.Dataset ds: An open netCDF dataset ''' results = [] for variable in cfutil.get_geophysical_variables(ds): msgs = [] ctype = getattr(ds.variables[variable], 'coverage_content_type', None) check = ctype is not None if not check: msgs.append("Var %s missing attr coverage_content_type" % variable) results.append( Result(BaseCheck.HIGH, check, (variable, "coverage_content_type"), msgs)) continue # ISO 19115-1 codes valid_ctypes = { 'image', 'thematicClassification', 'physicalMeasurement', 'auxiliaryInformation', 'qualityInformation', 'referenceInformation', 'modelResult', 'coordinate' } if ctype not in valid_ctypes: msgs.append( "Var %s does not have a coverage_content_type in %s" % (variable, sorted(valid_ctypes))) return results
def _get_result(self, primary_arg): self._atmodat_status_to_level(self.kwargs["status"]) ds = primary_arg score = nc_util.check_conventions_version_number(ds, self.kwargs["attribute"], self.kwargs["convention_type"], self.kwargs["min_version"], self.kwargs["max_version"]) messages = [] if self.kwargs["convention_type"] == 'CF': self.message_templates[1] = "'{attribute}' {convention_type} Convention information not present" self.message_templates[2] = "'{attribute}' {convention_type} Convention version not in valid range of " \ "{min_version} to {max_version}" elif self.kwargs["convention_type"] == 'ATMODAT': self.message_templates[1] = "'{attribute}' {convention_type} Standard information not present" self.message_templates[2] = "'{attribute}' {convention_type} Standard version given is not in accordance " \ "with performed checks" self._define_messages(messages) if score == 0: # The existence of the "Conventions" attribute is already checked by GlobalAttrTypeCheck, so no output of an # error message is needed here return else: if score < self.out_of: messages.append(self.get_messages()[score]) return Result(self.level, (score, self.out_of), self.get_short_name(), messages)
def _check_scalar_vertical_extents(self, ds, z_variable): ''' Check the scalar value of Z compared to the vertical extents which should also be equivalent :param netCDF4.Dataset ds: An open netCDF dataset :param str z_variable: Name of the variable representing the Z-Axis ''' vert_min = ds.geospatial_vertical_min vert_max = ds.geospatial_vertical_max msgs = [] total = 2 zvalue = ds.variables[z_variable][:].item() if not np.isclose(vert_min, vert_max): msgs.append( "geospatial_vertical_min != geospatial_vertical_max for scalar depth values, %s != %s" % (vert_min, vert_max)) if not np.isclose(vert_max, zvalue): msgs.append("geospatial_vertical_max != %s values, %s != %s" % (z_variable, vert_max, zvalue)) return Result(BaseCheck.MEDIUM, (total - len(msgs), total), 'geospatial_vertical_extents_match', msgs)
def _check_total_z_extents(self, ds, z_variable): ''' Check the entire array of Z for minimum and maximum and compare that to the vertical extents defined in the global attributes :param netCDF4.Dataset ds: An open netCDF dataset :param str z_variable: Name of the variable representing the Z-Axis ''' vert_min = ds.geospatial_vertical_min vert_max = ds.geospatial_vertical_max msgs = [] total = 2 zvalue = ds.variables[z_variable][:] # If the array has fill values, which is allowed in the case of point # features if hasattr(zvalue, 'mask'): zvalue = zvalue[~zvalue.mask] zmin = zvalue.min() zmax = zvalue.max() if not np.isclose(vert_min, zmin): msgs.append("geospatial_vertical_min != min(%s) values, %s != %s" % (z_variable, vert_min, zmin)) if not np.isclose(vert_max, zmax): msgs.append("geospatial_vertical_max != max(%s) values, %s != %s" % (z_variable, vert_min, zmax)) return Result(BaseCheck.MEDIUM, (total - len(msgs), total), 'geospatial_vertical_extents_match', msgs)
def check_time_variable(self, dataset): """ MOD from IMOS1_3Check class to match the lower case TIME variable Check time variable attributes: standard_name axis calendar type units """ time_attributes = { 'standard_name': ['time'], 'axis': ['T'], 'calendar': ['gregorian'] } ret_val = [] if 'time' in dataset.variables: time_var = dataset.variables['time'] result = Result(BaseCheck.MEDIUM, True, name=('var', 'time')) if time_var.dtype != np.int32: result.value = False result.msgs = ["The time variable should be of type int"] ret_val.append(result) ret_val.extend( check_attribute_dict(time_attributes, time_var) ) ret_val.append( check_attribute('units', self.time_units, time_var, BaseCheck.MEDIUM) ) return ret_val
def check_attribute(name, expected, ds, priority=BaseCheck.HIGH, result_name=None, optional=False): """ Basic attribute checks. `name` is the name of an attribute expected to be present in the "dataset" `ds` (either netCDF4 Dataset or Variable object). `expected` determines what is checked. If expected is * Null, check for presence of attribute and ensure is not an empty string (after stripping whitespace). * An iterable - check that attribute has one of the values in the iterable * A type - check that attribute is of the given type. * A function - called with the attribute value as argument, should return a tuple (result_value, message). The name of the attribute will be prepended to the message. * A string - assumed to be a regular expression that the attribute must match. Returns a Result object with the given `priority`. The result.name attribute is set to `result_name` if given, ottherwise it is generated using the type of `ds` and value of `name`. If optional is set to True and the attribute does not exist, returns None (i.e. skip) instead of a fail result. Initially copied from `attr_check` function from compliance_checker/base.py at https://github.com/ioos/compliance-checker. """ if result_name is None: if isinstance(ds, Dataset): result_name = ('globalattr', name) message_name = "Attribute %s" % name else: result_name = ('var', ds.name, name) message_name = "Attribute %s:%s" % (ds.name, name) result = Result(priority, name=result_name, msgs=[]) value = getattr(ds, name, None) if value is None: if optional: return None result.value = False result.msgs.append("%s missing" % message_name) return result if expected is None: # see if attribute is a non-empty string try: if not value.strip(): result.value = False result.msgs.append("%s is empty or completely whitespace" % message_name) else: result.value = True # if not a string/has no strip method we should be OK except AttributeError: result.value = True elif hasattr(expected, '__iter__'): if value in expected: result.value = True else: result.value = False if len(expected) == 1: msg = "%s should be equal to %s" % (message_name, expected[0]) else: msg = "%s should be one of %s" % (message_name, expected) result.msgs.append(msg) elif isinstance(expected, type): if isinstance(value, expected): result.value = True else: result.value = False result.msgs.append( '%s should be of %s' % (message_name, str(expected).strip('<>')) # str(expected) looks like "<type 'float'>" ) elif hasattr(expected, '__call__'): result.value, message = expected(value) if not result.value and message: result.msgs.append('%s %s' % (message_name, message)) elif isinstance(expected, basestring): if not isinstance(value, basestring): result.value = False result.msgs.append('%s should be a string' % message_name) elif re.match(expected, value): result.value = True else: result.value = False result.msgs.append( "%s does't match expected pattern '%s'" % (message_name, expected) ) else: # unsupported type in second element raise TypeError("Second arg in tuple has unsupported type: {}".format(type(expected))) return result