Exemple #1
0
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds, 'time_coverage_start') and hasattr(ds, 'time_coverage_end')):
            return

        # Parse the ISO 8601 formatted dates
        try:
            t_min = dateparse(ds.time_coverage_start)
            t_max = dateparse(ds.time_coverage_end)
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format.'])

        timevar = cfutil.get_time_variable(ds)

        if not timevar:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'])

        # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY
        try:
            # num2date returns as naive date, but with time adjusted to UTC
            # we need to attach timezone information here, or the date
            # subtraction from t_min/t_max will assume that a naive timestamp is
            # in the same time zone and cause erroneous results.
            # Pendulum uses UTC by default, but we are being explicit here
            time0 = pendulum.instance(num2date(ds.variables[timevar][0],
                                      ds.variables[timevar].units), 'UTC')
            time1 = pendulum.instance(num2date(ds.variables[timevar][-1],
                                      ds.variables[timevar].units), 'UTC')
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Failed to retrieve and convert times for variables %s.' % timevar])

        start_dt = abs(time0 - t_min)
        end_dt = abs(time1 - t_max)

        score = 2
        msgs = []
        if start_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_start and actual "
                        "time values %s (time_coverage_start) != %s (time[0])" % (t_min.isoformat(), time0.isoformat()))
            score -= 1
        if end_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_end and actual "
                        "time values %s (time_coverage_end) != %s (time[N])" % (t_max.isoformat(), time1.isoformat()))
            score -= 1

        return Result(BaseCheck.MEDIUM,
                      (score, 2),
                      'time_coverage_extents_match',
                      msgs)
Exemple #2
0
    def get_applicable_variables(self, ds):
        '''
        Returns a list of variable names that are applicable to ACDD Metadata
        Checks for variables. This includes geophysical and coordinate
        variables only.

        :param netCDF4.Dataset ds: An open netCDF dataset
        '''
        if self._applicable_variables is None:
            self.applicable_variables = cfutil.get_geophysical_variables(ds)
            varname = cfutil.get_time_variable(ds)
            # avoid duplicates by checking if already present
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_lon_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_lat_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_z_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)

        return self.applicable_variables
    def get_applicable_variables(self, ds):
        """
        Returns a list of variable names that are applicable to ACDD Metadata
        Checks for variables. This includes geophysical and coordinate
        variables only.

        :param netCDF4.Dataset ds: An open netCDF dataset
        """
        if self._applicable_variables is None:
            self.applicable_variables = cfutil.get_geophysical_variables(ds)
            varname = cfutil.get_time_variable(ds)
            # avoid duplicates by checking if already present
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_lon_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_lat_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)
            varname = cfutil.get_z_variable(ds)
            if varname and (varname not in self.applicable_variables):
                self.applicable_variables.append(varname)

        return self.applicable_variables
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds, 'time_coverage_start') and hasattr(ds, 'time_coverage_end')):
            return

        # Parse the ISO 8601 formatted dates
        try:
            t_min = dateparse(ds.time_coverage_start)
            t_max = dateparse(ds.time_coverage_end)
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['time_coverage attributes are not formatted properly. Use the ISO 8601:2004 date format, preferably the extended format.'])

        timevar = cfutil.get_time_variable(ds)

        if not timevar:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'])

        # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY
        try:
            # num2date returns as naive date, but with time adjusted to UTC
            # we need to attach timezone information here, or the date
            # subtraction from t_min/t_max will assume that a naive timestamp is
            # in the same time zone and cause erroneous results.
            # Pendulum uses UTC by default, but we are being explicit here
            time0 = pendulum.instance(num2date(ds.variables[timevar][0],
                                      ds.variables[timevar].units), 'UTC')
            time1 = pendulum.instance(num2date(ds.variables[timevar][-1],
                                      ds.variables[timevar].units), 'UTC')
        except:
            return Result(BaseCheck.MEDIUM,
                          False,
                          'time_coverage_extents_match',
                          ['Failed to retrieve and convert times for variables %s.' % timevar])

        start_dt = abs(time0 - t_min)
        end_dt = abs(time1 - t_max)

        score = 2
        msgs = []
        if start_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_start and actual "
                        "time values %s (time_coverage_start) != %s (time[0])" % (t_min.isoformat(), time0.isoformat()))
            score -= 1
        if end_dt > timedelta(hours=1):
            msgs.append("Date time mismatch between time_coverage_end and actual "
                        "time values %s (time_coverage_end) != %s (time[N])" % (t_max.isoformat(), time1.isoformat()))
            score -= 1

        return Result(BaseCheck.MEDIUM,
                      (score, 2),
                      'time_coverage_extents_match',
                      msgs)
Exemple #5
0
    def check_time_extents(self, ds):
        """
        Check that the values of time_coverage_start/time_coverage_end approximately match the data.
        """
        if not (hasattr(ds, 'time_coverage_start')
                and hasattr(ds, 'time_coverage_end')):
            return

        # allows non-ISO 8601 formatted dates
        try:
            t_min = dateparse(ds.time_coverage_start)
            t_max = dateparse(ds.time_coverage_end)
        except:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'time_coverage variables are not formatted properly. Please ensure they are valid ISO-8601 time strings'
                ])

        timevar = cfutil.get_time_variable(ds)

        if not timevar:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4'
                ])

        # Time should be monotonically increasing, so we make that assumption here so we don't have to download THE ENTIRE ARRAY
        try:
            time0 = num2date(ds.variables[timevar][0],
                             ds.variables[timevar].units)
            time1 = num2date(ds.variables[timevar][-1],
                             ds.variables[timevar].units)
        except:
            return Result(
                BaseCheck.MEDIUM, False, 'time_coverage_extents_match', [
                    'Failed to retrieve and convert times for variables %s.' %
                    timevar
                ])

        start_dt = abs(time0 - t_min)
        end_dt = abs(time1 - t_max)

        score = 2
        msgs = []
        if start_dt > timedelta(hours=1):
            msgs.append(
                "Date time mismatch between time_coverage_start and actual "
                "time values %s (time_coverage_start) != %s (time[0])" %
                (t_min.isoformat(), time0.isoformat()))
            score -= 1
        if end_dt > timedelta(hours=1):
            msgs.append(
                "Date time mismatch between time_coverage_end and actual "
                "time values %s (time_coverage_end) != %s (time[N])" %
                (t_max.isoformat(), time1.isoformat()))
            score -= 1

        return Result(BaseCheck.MEDIUM, (score, 2),
                      'time_coverage_extents_match', msgs)
Exemple #6
0
    def get_applicable_variables(self, ds):
        '''
        Returns a list of variable names that are applicable to ACDD Metadata
        Checks for variables. This includes geophysical and coordinate
        variables only.

        :param netCDF4.Dataset ds: An open netCDF dataset
        '''
        if self._applicable_variables is None:
            self.applicable_variables = cfutil.get_geophysical_variables(ds)
            varname = cfutil.get_time_variable(ds)
            if varname:
                self.applicable_variables.append(varname)
            varname = cfutil.get_lon_variable(ds)
            if varname:
                self.applicable_variables.append(varname)
            varname = cfutil.get_lat_variable(ds)
            if varname:
                self.applicable_variables.append(varname)
            varname = cfutil.get_z_variable(ds)
            if varname:
                self.applicable_variables.append(varname)
        return self.applicable_variables