def _get_result(self, primary_arg):
        ds = primary_arg
        var_id = self.kwargs["var_id"]

        messages = []
        score = 0

        # Check the variable exists first
        if var_id not in ds.variables:
            messages = [self.get_messages()[score]]
            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)

        score += 1

        vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2])
        expected_values = vocabs.get_value("coordinate:{}".format(var_id),
                                           "data")["value"]

        actual_values = ds.variables[var_id][:]

        # Cast to a list if not iterable
        if not hasattr(actual_values, "__len__"):
            actual_values = [actual_values]

        if list(expected_values) == list(actual_values):
            score += 1
        else:
            messages = [self.get_messages()[score]]

        return Result(self.level, (score, self.out_of), self.get_short_name(),
                      messages)
    def _get_result(self, primary_arg):
        ds = primary_arg

        score = 0
        messages = []

        # Check main variable is identifiable first
        try:
            variable = nc_util.get_main_variable(ds)
            score += 1
        except:
            messages = [self.get_messages()[score]]
            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)

        # Now check attribute
        attr_name = self.kwargs["attr_name"]

        if attr_name not in variable.ncattrs():
            messages = [self.get_messages()[score]]

        else:
            score += 1
            # Check the value of attribute

            expected_value = self.kwargs["attr_value"]
            check = nc_util.check_nc_attribute(variable, attr_name, expected_value)

            if check:
                score += 1
            else:
                messages.append(self.get_messages()[score])

        return Result(self.level, (score, self.out_of),
                      self.get_short_name(), messages)
    def _get_result(self, primary_arg):
        ds = primary_arg
        var_id = self.kwargs["var_id"]

        messages = []
        score = 0

        # Check the variable exists first
        if var_id not in ds.variables:
            messages = [self.get_messages()[score]]
            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)

        score += 1

        # Now check the "bounds" attribute exists and relates to a separate variable
        variable = ds.variables[var_id]

        if "bounds" in variable.ncattrs() and getattr(
                variable, "bounds") in ds.variables:
            score += 1
        else:
            messages = [self.get_messages()[score]]

        return Result(self.level, (score, self.out_of), self.get_short_name(),
                      messages)
    def _get_result(self, primary_arg):

        ds = primary_arg
        var_id = self.kwargs["var_id"]

        messages = []
        score = 0

        # Check the variable exists first
        if var_id not in ds.variables:
            messages = [self.get_messages()[score]]
            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)

        score += 1

        vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2])
        expected_length = vocabs.get_value("coordinate:{}".format(var_id),
                                           "data")["length"]

        actual_length = len(ds.variables[var_id][:])

        if expected_length == actual_length:
            score += 1
        else:
            messages = [self.get_messages()[score]]

        return Result(self.level, (score, self.out_of), self.get_short_name(),
                      messages)
Ejemplo n.º 5
0
    def check_file_name_field4(self, ds):
        '''
        Check file name field4 matches time_coverage_start attribute
        '''
        ret_val = []
        result_name = ['file_name', 'check_file_name_field4']
        reasoning = [
            "File name field4 doesn't match time_coverage_start attribute"
        ]

        time_coverage_start = getattr(ds.dataset, 'time_coverage_start', None)
        passed = False
        if time_coverage_start is not None:
            # time_coverage_start format is yyyy-mm-ddTHH:MM:SSZ while
            # field4 format is yyyymmddTHHMMSSZ
            time_coverage_start = time_coverage_start.replace("-", "")
            time_coverage_start = time_coverage_start.replace(":", "")
            if self._file_names_length >= 4:
                field4 = self._file_names[3]
                if field4 != time_coverage_start:
                    passed = False
                else:
                    passed = True

            if passed:
                result = Result(BaseCheck.HIGH, True, result_name, None)
            else:
                result = Result(BaseCheck.HIGH, False, result_name, reasoning)

            ret_val.append(result)

        return ret_val
Ejemplo n.º 6
0
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds, 'time_coverage_start') and hasattr(ds, 'time_coverage_end')):
            return

        # Parse the ISO 8601 formatted dates
        try:
            t_min = dateparse(ds.time_coverage_start)
            t_max = dateparse(ds.time_coverage_end)
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format.'])

        timevar = cfutil.get_time_variable(ds)

        if not timevar:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'])

        # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY
        try:
            # num2date returns as naive date, but with time adjusted to UTC
            # we need to attach timezone information here, or the date
            # subtraction from t_min/t_max will assume that a naive timestamp is
            # in the same time zone and cause erroneous results.
            # Pendulum uses UTC by default, but we are being explicit here
            time0 = pendulum.instance(num2date(ds.variables[timevar][0],
                                      ds.variables[timevar].units), 'UTC')
            time1 = pendulum.instance(num2date(ds.variables[timevar][-1],
                                      ds.variables[timevar].units), 'UTC')
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Failed to retrieve and convert times for variables %s.' % timevar])

        start_dt = abs(time0 - t_min)
        end_dt = abs(time1 - t_max)

        score = 2
        msgs = []
        if start_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_start and actual "
                        "time values %s (time_coverage_start) != %s (time[0])" % (t_min.isoformat(), time0.isoformat()))
            score -= 1
        if end_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_end and actual "
                        "time values %s (time_coverage_end) != %s (time[N])" % (t_max.isoformat(), time1.isoformat()))
            score -= 1

        return Result(BaseCheck.MEDIUM,
                      (score, 2),
                      'time_coverage_extents_match',
                      msgs)
Ejemplo n.º 7
0
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds, 'time_coverage_start')
                and hasattr(ds, 'time_coverage_end')):
            return

        # allows non-ISO 8601 formatted dates
        try:
            t_min = dateparse(ds.time_coverage_start)
            t_max = dateparse(ds.time_coverage_end)
        except:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'time_coverage variables are not formatted properly. Please ensure they are valid ISO-8601 time strings'
                ])

        timevar = cfutil.get_time_variable(ds)

        if not timevar:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'
                ])

        # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY
        try:
            time0 = num2date(ds.variables[timevar][0],
                             ds.variables[timevar].units)
            time1 = num2date(ds.variables[timevar][-1],
                             ds.variables[timevar].units)
        except:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'Failed to retrieve and convert times for variables %s.' %
                    timevar
                ])

        start_dt = abs(time0 - t_min)
        end_dt = abs(time1 - t_max)

        score = 2
        msgs = []
        if start_dt > timedelta(hours=1):
            msgs.append(
                "Date time mismatch between time_coverage_start and actual "
                "time values %s (time_coverage_start) != %s (time[0])" %
                (t_min.isoformat(), time0.isoformat()))
            score -= 1
        if end_dt > timedelta(hours=1):
            msgs.append(
                "Date time mismatch between time_coverage_end and actual "
                "time values %s (time_coverage_end) != %s (time[N])" %
                (t_max.isoformat(), time1.isoformat()))
            score -= 1

        return Result(BaseCheck.MEDIUM, (score, 2),
                      'time_coverage_extents_match', msgs)
Ejemplo n.º 8
0
    def _check_total_z_extents(self, ds, z_variable):
        """
        Check the entire array of Z for minimum and maximum and compare that to
        the vertical extents defined in the global attributes

        :param netCDF4.Dataset ds: An open netCDF dataset
        :param str z_variable: Name of the variable representing the Z-Axis
        """
        msgs = []
        total = 2
        try:
            vert_min = float(ds.geospatial_vertical_min)
        except ValueError:
            msgs.append("geospatial_vertical_min cannot be cast to float")

        try:
            vert_max = float(ds.geospatial_vertical_max)
        except ValueError:
            msgs.append("geospatial_vertical_max cannot be cast to float")
        if len(msgs) > 0:
            return Result(
                BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs
            )

        zvalue = ds.variables[z_variable][:]
        # If the array has fill values, which is allowed in the case of point
        # features
        if hasattr(zvalue, "mask"):
            zvalue = zvalue[~zvalue.mask]

        if zvalue.size == 0:
            msgs.append(
                "Cannot compare geospatial vertical extents "
                "against min/max of data, as non-masked data "
                "length is zero"
            )
            return Result(
                BaseCheck.MEDIUM, (0, total), "geospatial_vertical_extents_match", msgs
            )
        else:
            zmin = zvalue.min()
            zmax = zvalue.max()
            if not np.isclose(vert_min, zmin):
                msgs.append(
                    "geospatial_vertical_min != min(%s) values, %s != %s"
                    % (z_variable, vert_min, zmin)
                )
            if not np.isclose(vert_max, zmax):
                msgs.append(
                    "geospatial_vertical_max != max(%s) values, %s != %s"
                    % (z_variable, vert_min, zmax)
                )

        return Result(
            BaseCheck.MEDIUM,
            (total - len(msgs), total),
            "geospatial_vertical_extents_match",
            msgs,
        )
Ejemplo n.º 9
0
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds.dataset, 'time_coverage_start')
                and hasattr(ds.dataset, 'time_coverage_end')):
            return

        epoch = parse_dt("1970-01-01 00:00:00 UTC")
        t_min = (parse_dt(ds.dataset.time_coverage_start) -
                 epoch).total_seconds()
        t_max = (parse_dt(ds.dataset.time_coverage_end) -
                 epoch).total_seconds()

        # identify t vars as per CF 4.4
        t_vars = [
            var for name, var in ds.dataset.variables.iteritems()
            if is_time_variable(name, var)
        ]

        if len(t_vars) == 0:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match',
                'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'
            )

        obs_mins = {
            var._name: Unit(str(
                var.units)).get_converter("seconds since 1970-01-01").evaluate(
                    np.nanmin(var))
            for var in t_vars
        }
        obs_maxs = {
            var._name: Unit(str(
                var.units)).get_converter("seconds since 1970-01-01").evaluate(
                    np.nanmax(var))
            for var in t_vars
        }

        min_pass = any(
            (np.isclose(t_min, min_val) for min_val in obs_mins.itervalues()))
        max_pass = any(
            (np.isclose(t_max, max_val) for max_val in obs_maxs.itervalues()))

        allpass = sum((min_pass, max_pass))

        msgs = []
        if not min_pass:
            msgs.append(
                "Data for possible time variables (%s) did not match time_coverage_start value (%s)"
                % (obs_mins, t_min))
        if not max_pass:
            msgs.append(
                "Data for possible time variables (%s) did not match time_coverage_end value (%s)"
                % (obs_maxs, t_max))

        return Result(BaseCheck.MEDIUM, (allpass, 2),
                      'time_coverage_extents_match', msgs)
    def _get_result(self, primary_arg):
        ds = primary_arg
        score = 0
        var_id = self._get_var_id(ds)

        # Check the variable first (will match if `var_id` is None from previous call)
        if var_id not in ds.variables:
            messages = self.get_messages()[:1]
            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)

        # Work out the overall 'out of' value based on number of attributes
        vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2])
        lookup = ":".join([self.kwargs["pyessv_namespace"], var_id])
        expected_attr_dict = vocabs.get_value(lookup, "data")

        self.out_of = 1 + len(expected_attr_dict) * 2

        score += 1
        variable = ds.variables[var_id]
        messages = []

        # Check the variable attributes one-by-one
        for attr, expected_value in expected_attr_dict.items():

            # Check items to ignore
            ignores = self.kwargs["ignores"]

            if ignores and attr in ignores:
                self.out_of -= 2
                continue

            KNOWN_IGNORE_VALUES = ("<derived from file>",)

            if expected_value in KNOWN_IGNORE_VALUES:
                self.out_of -= 2
                continue

            if attr not in variable.ncattrs():
                messages.append("Required variable attribute '{}' is not present for "
                                "variable: '{}'.".format(attr, var_id))
            else:
                score += 1
                # Check the value of attribute
                check = nc_util.check_nc_attribute(variable, attr, expected_value)
                if check:
                    score += 1
                else:
                    messages.append(u"Required variable attribute '{}' has incorrect value ('{}') "
                                    u"for variable: '{}'. Value should be: '{}'.".format(attr,
                                                                                        getattr(variable, attr), var_id,
                                                                                        expected_value))

        return Result(self.level, (score, self.out_of),
                      self.get_short_name(), messages)
Ejemplo n.º 11
0
 def check_id_has_no_blanks(self, ds):
     #Check if there are blanks in the id field
     if not hasattr(ds, u'id'):
         return
     if ' ' in getattr(ds, u'id'):
         return Result(BaseCheck.MEDIUM,
                       False,
                       'no_blanks_in_id',
                       msgs=[u'There should be no blanks in the id field'])
     else:
         return Result(BaseCheck.MEDIUM, True, 'no_blanks_in_id', msgs=[])
Ejemplo n.º 12
0
    def check_id_has_no_blanks(self, ds):
        '''
        Check if there are blanks in the id field

        :param netCDF4.Dataset ds: An open netCDF dataset
        '''
        if not hasattr(ds, u'id'):
            return
        if ' ' in getattr(ds, u'id'):
            return Result(BaseCheck.MEDIUM, False, 'no_blanks_in_id',
                          msgs=[u'There should be no blanks in the id field'])
        else:
            return Result(BaseCheck.MEDIUM, True, 'no_blanks_in_id', msgs=[])
Ejemplo n.º 13
0
    def check_vertical_extents(self, ds):
        """
        Check that the values of geospatial_vertical_min/geospatial_vertical_max approximately match the data.
        """
        if not (hasattr(ds, 'geospatial_vertical_min')
                and hasattr(ds, 'geospatial_vertical_max')):
            return

        vert_min = ds.geospatial_vertical_min
        vert_max = ds.geospatial_vertical_max

        # identify vertical vars as per CF 4.3
        v_vars = [
            var for name, var in ds.variables.items()
            if is_vertical_coordinate(name, var)
        ]

        if len(v_vars) == 0:
            return Result(
                BaseCheck.MEDIUM, False, 'geospatial_vertical_extents_match', [
                    'Could not find vertical variable to test extent of geospatial_vertical_min/geospatial_vertical_max, see CF-1.6 spec chapter 4.3'
                ])

        obs_mins = {
            var._name: np.nanmin(var)
            for var in v_vars if not np.isnan(var).all()
        }
        obs_maxs = {
            var._name: np.nanmax(var)
            for var in v_vars if not np.isnan(var).all()
        }

        min_pass = any(
            (np.isclose(vert_min, min_val) for min_val in obs_mins.values()))
        max_pass = any(
            (np.isclose(vert_max, max_val) for max_val in obs_maxs.values()))

        allpass = sum((min_pass, max_pass))

        msgs = []
        if not min_pass:
            msgs.append(
                "Data for possible vertical variables (%s) did not match geospatial_vertical_min value (%s)"
                % (obs_mins, vert_min))
        if not max_pass:
            msgs.append(
                "Data for possible vertical variables (%s) did not match geospatial_vertical_max value (%s)"
                % (obs_maxs, vert_max))

        return Result(BaseCheck.MEDIUM, (allpass, 2),
                      'geospatial_vertical_extents_match', msgs)
Ejemplo n.º 14
0
    def check_altitude_units(self, ds):
        """
        If there's a variable named z, it must have units.

        @TODO: this is duplicated with check_variable_units
        """
        if 'z' in ds.variables:
            msgs = []
            val = 'units' in ds.variables['z'].ncattrs()
            if not val:
                msgs.append("Variable 'z' has no units attr")
            return Result(BaseCheck.LOW, val, 'Altitude Units', msgs)

        return Result(BaseCheck.LOW, (0, 0), 'Altitude Units', ["Dataset has no 'z' variable"])
Ejemplo n.º 15
0
    def check_var_coverage_content_type(self, ds):
        results = []
        platform_variable_name = getattr(ds, 'platform', None)
        for variable in ds.variables:
            msgs = []
            if variable in {'crs', platform_variable_name}:
                continue
            ctype = getattr(ds.variables[variable], 'coverage_content_type',
                            None)
            check = ctype is not None
            if not check:
                msgs.append("Var %s missing attr coverage_content_type" %
                            variable)
                results.append(
                    Result(BaseCheck.HIGH, check,
                           (variable, "coverage_content_type"), msgs))
                return results
            # ISO 19115-1 codes
            valid_ctypes = {
                'image', 'thematicClassification', 'physicalMeasurement',
                'auxiliaryInformation', 'qualityInformation',
                'referenceInformation', 'modelResult', 'coordinate'
            }
            if not ctype in valid_ctypes:
                msgs.append(
                    "Var %s does not have a coverage_content_type in %s" %
                    (variable, sorted(valid_ctypes)))

        return results
Ejemplo n.º 16
0
class NCFileIsReadableCheck(FileCheckBase):
    """
    Data file is recognised as a valid netCDF file, using sub-format: {file_format}.
    """
    short_name = "File is netCDF"
    defaults = {"file_format": "NETCDF4_CLASSIC"}
    message_templates = [
        "File is not in required netCDF format: {file_format}."
    ]
    level = "HIGH"

    def _get_result(self, primary_arg):
        from netCDF4 import Dataset

        try:
            ds = Dataset(primary_arg)
            assert (type(ds.variables) == OrderedDict)
            assert (type(ds.dimensions) == OrderedDict)
            assert (ds.file_format == self.kwargs['file_format'])
            success = True
        except Exception, err:
            success = False

        messages = []

        if success:
            score = self.out_of
        else:
            score = 0
            messages.append(self.get_messages()[score])

        return Result(self.level, (score, self.out_of), self.get_short_name(),
                      messages)
Ejemplo n.º 17
0
    def check_var_units(self, ds):
        results = []
        # We don't check certain container variables for units
        platform_variable_name = getattr(ds, 'platform', None)
        for variable in ds.variables:
            msgs = []
            if variable in ('crs', platform_variable_name):
                continue
            # If the variable is a QC flag, we don't need units
            std_name = getattr(ds.variables[variable], 'standard_name', None)
            if std_name is not None:
                if 'status_flag' in std_name:
                    continue
            # Check units and dims for variable
            unit_check = hasattr(ds.variables[variable], 'units')
            no_dim_check = (getattr(ds.variables[variable],
                                    'dimensions') == tuple())
            # Check if we have no dimensions.  If no dims, skip test
            if no_dim_check:
                continue
            # Check if we have no units
            if not unit_check:
                msgs.append("Var %s missing attr units" % variable)
            results.append(
                Result(BaseCheck.HIGH, unit_check, (variable, "var_units"),
                       msgs))

        return results
Ejemplo n.º 18
0
 def check_date_issued_is_iso(self, ds):
     #Checks if date issued field is ISO compliant
     if not hasattr(ds, u'date_issued'):
         return
     date_issued_check, msgs = datetime_is_iso(getattr(ds, u'date_issued'))
     return Result(BaseCheck.MEDIUM, date_issued_check,
                   'date_issued_is_iso', msgs)
Ejemplo n.º 19
0
        def trim_groups(r):
            if isinstance(r.name, tuple) or isinstance(r.name, list):
                new_name = r.name[1:]
            else:
                new_name = []

            return Result(r.weight, r.value, new_name, r.msgs)
Ejemplo n.º 20
0
    def _get_result(self, primary_arg):
        ds = primary_arg
        score = 0
        self.out_of = 1
        messages = []

        vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2])

        var_id = self.kwargs["var_id"]
        if var_id in ds.variables:
            array = ds[var_id][:]
            result = vocabs.check_array_matches_terms(
                array, self.kwargs["pyessv_namespace"])

            if result:
                score += 1
            else:
                messages.append(self.get_messages()[score])

        else:
            messages.append(
                "Variable '{}' not found in the file so cannot perform other checks."
                .format(var_id))

        return Result(self.level, (score, self.out_of), self.get_short_name(),
                      messages)
Ejemplo n.º 21
0
    def check_extension_name(self, ds):
        '''
        Check file extension name and ensure it equals to nc
        '''
        ret_val = []
        result_name = ['file_name', 'check_extension_name']
        reasoning = ["File extension name is not equal to nc"]

        if not self._file_extension_name == 'nc':
            result = Result(BaseCheck.HIGH, False, result_name, reasoning)
        else:
            result = Result(BaseCheck.HIGH, True, result_name, None)

        ret_val.append(result)

        return ret_val
Ejemplo n.º 22
0
 def test_score_grouping(self):
     # Testing the grouping of results for output, which can fail
     # if some assumptions are not met, e.g. if a Result object has
     # a value attribute of unexpected type
     res = [
         Result(BaseCheck.MEDIUM, True, 'one'),
         Result(BaseCheck.MEDIUM, (1, 3), 'one'),
         Result(BaseCheck.MEDIUM, None, 'one'),
         Result(BaseCheck.MEDIUM, True, 'two'),
         Result(BaseCheck.MEDIUM, np.isnan(1), 'two')  # value is type numpy.bool_
     ]
     score = self.cs.scores(res)
     self.assertEqual(score[0].name, 'one')
     self.assertEqual(score[0].value, (2, 4))
     self.assertEqual(score[1].name, 'two')
     self.assertEqual(score[1].value, (1, 2))
Ejemplo n.º 23
0
    def _has_var_attr(cls,
                      dataset,
                      vname,
                      attr,
                      concept_name,
                      priority=BaseCheck.HIGH):
        """
        Checks for the existance of an attr on variable vname in dataset, with the name/message using concept_name.
        """
        val = True
        msgs = []
        if vname not in dataset.variables:
            val = False
            msgs.append(
                "Variable '{}' not present while checking for attr '{}' for IOOS concept: '{}'"
                .format(vname, attr, concept_name))
        else:
            v = dataset.variables[vname]
            if attr not in v.ncattrs():
                val = False
                msgs.append(
                    "Attr '{}' not present on var '{}' while checking for IOOS concept: '{}'"
                    .format(attr, vname, concept_name))

        return Result(priority, val, concept_name, msgs)
    def _get_result(self, primary_arg):
        ds = primary_arg

        score = 0
        messages = []

        vocabs = ESSVocabs(*self.vocabulary_ref.split(":")[:2])
        fname = os.path.basename(ds.filepath())

        fn_score, msg = vocabs.check_file_name(fname, keys=self.kwargs["order"],
                                               delimiter=self.kwargs["delimiter"],
                                               extension=self.kwargs["extension"])
        score += fn_score
        if fn_score < (self.out_of / 3.):
            # a third of the marks are for the file name check
            messages.extend(msg)

        # Check global attributes one-by-one
        items = os.path.splitext(fname)[0].split(self.kwargs["delimiter"])

        for i, attr in enumerate(self.kwargs["order"]):
            if attr.startswith('regex:') or attr in self.kwargs["ignore_attr_checks"]:
                # Case 1: we do not have the attribute name - so cannot check
                # Case 2: instructed to not perform this check
                continue
            res, msg = vocabs.check_global_attribute_value(ds, attr, items[i],
                                                           property="raw_name")
            score += res

            if res < 2:
                messages.extend(msg)

        return Result(self.level, (score, self.out_of),
                      self.get_short_name(), messages)
Ejemplo n.º 25
0
        def build_group(label=None, weight=None, value=None, sub=None):
            label = label
            weight = weight
            value = self._translate_value(value)
            sub = sub or []

            return Result(weight=weight, value=value, name=label, children=sub)
Ejemplo n.º 26
0
 def check_acknowledgment(self, ds):
     """Check if acknowledgment/acknowledgment attr is present"""
     if not (hasattr(ds, 'acknowledgment')
             or hasattr(ds, 'acknowledgement')):
         return Result(BaseCheck.MEDIUM,
                       False,
                       'acknowledgment/acknowledgement',
                       msgs=[])
     else:
         return Result(
             BaseCheck.HIGH,
             True,
             'acknowledgment/acknowledgement',
             msgs=[
                 "Neither 'acknowledgment' nor 'acknowledgement' attributes present"
             ])
Ejemplo n.º 27
0
    def check_var_coverage_content_type(self, ds):
        '''
        Check coverage content type against valid ISO-19115-1 codes

        :param netCDF4.Dataset ds: An open netCDF dataset
        '''
        results = []
        for variable in cfutil.get_geophysical_variables(ds):
            msgs = []
            ctype = getattr(ds.variables[variable], 'coverage_content_type',
                            None)
            check = ctype is not None
            if not check:
                msgs.append("Var %s missing attr coverage_content_type" %
                            variable)
                results.append(
                    Result(BaseCheck.HIGH, check,
                           (variable, "coverage_content_type"), msgs))
                continue

            # ISO 19115-1 codes
            valid_ctypes = {
                'image', 'thematicClassification', 'physicalMeasurement',
                'auxiliaryInformation', 'qualityInformation',
                'referenceInformation', 'modelResult', 'coordinate'
            }
            if ctype not in valid_ctypes:
                msgs.append(
                    "Var %s does not have a coverage_content_type in %s" %
                    (variable, sorted(valid_ctypes)))

        return results
Ejemplo n.º 28
0
    def _get_result(self, primary_arg):
        self._atmodat_status_to_level(self.kwargs["status"])
        ds = primary_arg

        score = nc_util.check_conventions_version_number(ds, self.kwargs["attribute"], self.kwargs["convention_type"],
                                                         self.kwargs["min_version"], self.kwargs["max_version"])
        messages = []

        if self.kwargs["convention_type"] == 'CF':
            self.message_templates[1] = "'{attribute}' {convention_type} Convention information not present"
            self.message_templates[2] = "'{attribute}' {convention_type} Convention version not in valid range of " \
                                        "{min_version} to {max_version}"
        elif self.kwargs["convention_type"] == 'ATMODAT':
            self.message_templates[1] = "'{attribute}' {convention_type} Standard information not present"
            self.message_templates[2] = "'{attribute}' {convention_type} Standard version given is not in accordance " \
                                        "with performed checks"

        self._define_messages(messages)

        if score == 0:
            # The existence of the "Conventions" attribute is already checked by GlobalAttrTypeCheck, so no output of an
            # error message is needed here
            return
        else:
            if score < self.out_of:
                messages.append(self.get_messages()[score])

            return Result(self.level, (score, self.out_of),
                          self.get_short_name(), messages)
Ejemplo n.º 29
0
    def _check_scalar_vertical_extents(self, ds, z_variable):
        '''
        Check the scalar value of Z compared to the vertical extents which
        should also be equivalent

        :param netCDF4.Dataset ds: An open netCDF dataset
        :param str z_variable: Name of the variable representing the Z-Axis
        '''
        vert_min = ds.geospatial_vertical_min
        vert_max = ds.geospatial_vertical_max
        msgs = []
        total = 2

        zvalue = ds.variables[z_variable][:].item()
        if not np.isclose(vert_min, vert_max):
            msgs.append(
                "geospatial_vertical_min != geospatial_vertical_max for scalar depth values, %s != %s"
                % (vert_min, vert_max))

        if not np.isclose(vert_max, zvalue):
            msgs.append("geospatial_vertical_max != %s values, %s != %s" %
                        (z_variable, vert_max, zvalue))

        return Result(BaseCheck.MEDIUM, (total - len(msgs), total),
                      'geospatial_vertical_extents_match', msgs)
Ejemplo n.º 30
0
    def _check_total_z_extents(self, ds, z_variable):
        '''
        Check the entire array of Z for minimum and maximum and compare that to
        the vertical extents defined in the global attributes

        :param netCDF4.Dataset ds: An open netCDF dataset
        :param str z_variable: Name of the variable representing the Z-Axis
        '''
        vert_min = ds.geospatial_vertical_min
        vert_max = ds.geospatial_vertical_max
        msgs = []
        total = 2

        zvalue = ds.variables[z_variable][:]
        # If the array has fill values, which is allowed in the case of point
        # features
        if hasattr(zvalue, 'mask'):
            zvalue = zvalue[~zvalue.mask]
        zmin = zvalue.min()
        zmax = zvalue.max()
        if not np.isclose(vert_min, zmin):
            msgs.append("geospatial_vertical_min != min(%s) values, %s != %s" %
                        (z_variable, vert_min, zmin))
        if not np.isclose(vert_max, zmax):
            msgs.append("geospatial_vertical_max != max(%s) values, %s != %s" %
                        (z_variable, vert_min, zmax))

        return Result(BaseCheck.MEDIUM, (total - len(msgs), total),
                      'geospatial_vertical_extents_match', msgs)
Ejemplo n.º 31
0
    def check_time_variable(self, dataset):
        """
        MOD from IMOS1_3Check class to match the lower case TIME variable

        Check time variable attributes:
            standard_name
            axis
            calendar
            type
            units
        """
        time_attributes = {
            'standard_name': ['time'],
            'axis': ['T'],
            'calendar': ['gregorian']
        }

        ret_val = []

        if 'time' in dataset.variables:
            time_var = dataset.variables['time']

            result = Result(BaseCheck.MEDIUM, True, name=('var', 'time'))
            if time_var.dtype != np.int32:
                result.value = False
                result.msgs = ["The time variable should be of type int"]
            ret_val.append(result)

            ret_val.extend(
                check_attribute_dict(time_attributes, time_var)
            )

            ret_val.append(
                check_attribute('units', self.time_units, time_var, BaseCheck.MEDIUM)
            )

        return ret_val
Ejemplo n.º 32
0
def check_attribute(name, expected, ds, priority=BaseCheck.HIGH, result_name=None, optional=False):
    """
    Basic attribute checks.

    `name` is the name of an attribute expected to be present in the
    "dataset" `ds` (either netCDF4 Dataset or Variable object).

    `expected` determines what is checked. If expected is
    * Null, check for presence of attribute and ensure is not an empty
      string (after stripping whitespace).
    * An iterable - check that attribute has one of the values in the iterable
    * A type - check that attribute is of the given type.
    * A function - called with the attribute value as argument, should return a tuple
      (result_value, message). The name of the attribute will be prepended to the message.
    * A string - assumed to be a regular expression that the attribute must match.

    Returns a Result object with the given `priority`. The result.name attribute is set to
    `result_name` if given, ottherwise it is generated using the type of `ds` and value
    of `name`.

    If optional is set to True and the attribute does not exist, returns None
    (i.e. skip) instead of a fail result.

    Initially copied from `attr_check` function from compliance_checker/base.py
    at https://github.com/ioos/compliance-checker.

    """
    if result_name is None:
        if isinstance(ds, Dataset):
            result_name = ('globalattr', name)
            message_name = "Attribute %s" % name
        else:
            result_name = ('var', ds.name, name)
            message_name = "Attribute %s:%s" % (ds.name, name)
    result = Result(priority, name=result_name, msgs=[])
    value = getattr(ds, name, None)

    if value is None:
        if optional: return None
        result.value = False
        result.msgs.append("%s missing" % message_name)
        return result

    if expected is None:
        # see if attribute is a non-empty string
        try:
            if not value.strip():
                result.value = False
                result.msgs.append("%s is empty or completely whitespace" % message_name)
            else:
                result.value = True
        # if not a string/has no strip method we should be OK
        except AttributeError:
            result.value = True

    elif hasattr(expected, '__iter__'):
        if value in expected:
            result.value = True
        else:
            result.value = False
            if len(expected) == 1:
                msg = "%s should be equal to %s" % (message_name, expected[0])
            else:
                msg = "%s should be one of %s" % (message_name, expected)
            result.msgs.append(msg)

    elif isinstance(expected, type):
        if isinstance(value, expected):
            result.value = True
        else:
            result.value = False
            result.msgs.append(
                '%s should be of %s' % (message_name, str(expected).strip('<>'))
                # str(expected) looks like "<type 'float'>"
            )

    elif hasattr(expected, '__call__'):
        result.value, message = expected(value)
        if not result.value and message:
            result.msgs.append('%s %s' % (message_name, message))

    elif isinstance(expected, basestring):
        if not isinstance(value, basestring):
            result.value = False
            result.msgs.append('%s should be a string' % message_name)
        elif re.match(expected, value):
            result.value = True
        else:
            result.value = False
            result.msgs.append(
                "%s does't match expected pattern '%s'" % (message_name, expected)
            )

    else: # unsupported type in second element
        raise TypeError("Second arg in tuple has unsupported type: {}".format(type(expected)))

    return result