Python CFBaseCheck Exemples, compliance_checker.cf.CFBaseCheck Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_cf.py Projet : ckearney07/compliance-checker

 def setUp(self):
     '''
     Initialize the dataset
     '''
     self.cf = CFBaseCheck()

Exemple #2

0

Afficher le fichier

Fichier : test_cf.py Projet : ioos/compliance-checker

 def setUp(self):
     '''
     Initialize the dataset
     '''
     self.cf = CFBaseCheck()

Exemple #3

0

Afficher le fichier

Fichier : test_cf.py Projet : ckearney07/compliance-checker

class TestCF(BaseTestCase):

    def setUp(self):
        '''
        Initialize the dataset
        '''
        self.cf = CFBaseCheck()

    # --------------------------------------------------------------------------------
    # Helper Methods
    # --------------------------------------------------------------------------------

    def new_nc_file(self):
        '''
        Make a new temporary netCDF file for the scope of the test
        '''
        nc_file_path = os.path.join(gettempdir(), 'example.nc')
        if os.path.exists(nc_file_path):
            raise IOError('File Exists: %s' % nc_file_path)
        nc = Dataset(nc_file_path, 'w')
        self.addCleanup(os.remove, nc_file_path)
        self.addCleanup(nc.close)
        return nc

    def load_dataset(self, nc_dataset):
        '''
        Return a loaded NC Dataset for the given path
        '''
        if not isinstance(nc_dataset, str):
            raise ValueError("nc_dataset should be a string")

        nc_dataset = Dataset(nc_dataset, 'r')
        self.addCleanup(nc_dataset.close)
        return nc_dataset

    def get_results(self, results):
        '''
        Returns a tuple of the value scored, possible, and a list of messages
        in the result set.
        '''
        out_of = 0
        scored = 0
        for r in results:
            if isinstance(r.value, tuple):
                out_of += r.value[1]
                scored += r.value[0]
            else:
                out_of += 1
                scored += int(r.value)

        # Store the messages
        messages = []
        for r in results:
            messages.extend(r.msgs)

        return scored, out_of, messages

    # --------------------------------------------------------------------------------
    # Compliance Tests
    # --------------------------------------------------------------------------------

    def test_check_data_types(self):
        """
        2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_data_types(dataset)
        assert result.value[0] == result.value[1]

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_data_types(dataset)
        assert result.msgs[0] == 'The variable temp failed because the datatype is int64'
        assert result.value == (6, 7)

    def test_naming_conventions(self):
        '''
        Section 2.3 Naming Conventions

        Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores.
        '''
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.variables)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§2.3 Naming Conventions for variables']
        assert result.value == (num_var, num_var)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_naming_conventions(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§2.3 Naming Conventions for variables']
        assert result.value == (13, 14)
        assert u'variable _poor_dim should begin with a letter and be composed of letters, digits, and underscores' == result.msgs[0]
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (49, 51)

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        results = self.cf.check_naming_conventions(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§2.3 Naming Conventions for variables']
        assert result.value == (6, 7)
        assert u'variable bad name should begin with a letter and be composed of letters, digits, and underscores' == result.msgs[0]

        result = result_dict[u'§2.3 Naming Conventions for attributes']
        assert result.msgs[0] == ('attribute no_reason:_bad_attr should begin with a letter and be '
                                  'composed of letters, digits, and underscores')
        assert result.msgs[1] == ('global attribute bad global should begin with a letter and be '
                                  'composed of letters, digits, and underscores')

        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (16, 19)

    def test_check_names_unique(self):
        """
        2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same.
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_names_unique(dataset)

        num_var = len(dataset.variables)
        expected = (num_var,) * 2

        self.assertEqual(result.value, expected)

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_names_unique(dataset)
        assert result.value == (6, 7)
        assert result.msgs[0] == 'Variables are not case sensitive. Duplicate variables named: not_unique'

    def test_check_dimension_names(self):
        """
        2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_names(dataset)
        assert result.value == (6, 7)

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_dimension_names(dataset)
        assert result.msgs[0] == u'no_reason has two or more dimensions named time'

    def test_check_dimension_order(self):
        """
        2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z),
        "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y,
        then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the
        left of the spatiotemporal dimensions.
        """
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_order(dataset)
        assert result.value == (5, 6)
        assert result.msgs[0] == ("really_bad's dimensions are not in the recommended order "
                                  "T, Z, Y, X. They are latitude, power")

    def test_check_fill_value_outside_valid_range(self):
        """
        2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_fill_value_outside_valid_range(dataset)
        assert result.msgs[0] == ('salinity:_FillValue (1.0) should be outside the '
                                  'range specified by valid_min/valid_max (-10, 10)')

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_fill_value_outside_valid_range(dataset)
        assert result.value == (1, 2)
        assert result.msgs[0] == ('wind_speed:_FillValue (12.0) should be outside the '
                                  'range specified by valid_min/valid_max (0.0, 20.0)')

    def test_check_conventions_are_cf_16(self):
        """
        2.6.1 the NUG defined global attribute Conventions to the string value
        "CF-1.6"
        """
        # :Conventions = "CF-1.6"
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "CF-1.6 ,ACDD" ;
        dataset = self.load_dataset(STATIC_FILES['conv_multi'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "NoConvention"
        dataset = self.load_dataset(STATIC_FILES['conv_bad'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertFalse(result.value)
        assert result.msgs[0] == ('Conventions global attribute does not contain '
                                  '"CF-1.6". The CF Checker only supports CF-1.6 '
                                  'at this time.')

    def test_check_convention_globals(self):
        """
        2.6.2 title/history global attributes, must be strings. Do not need to exist.
        """
        # check for pass
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_globals(dataset)
        assert result.value == (2, 2)
        # check if it doesn't exist that we pass
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_globals(dataset)
        assert result.value == (0, 2)
        assert result.msgs[0] == 'global attribute title should exist and be a non-empty string'

    def test_check_convention_possibly_var_attrs(self):
        """
        3.1 The units attribute is required for all variables that represent dimensional quantities
        (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables
        defined in Section 7.4, "Climatological Statistics").

        Units are not required for dimensionless quantities. A variable with no units attribute is assumed
        to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be
        included.

        - units required
        - type must be recognized by udunits
        - if std name specified, must be consistent with standard name table, must also be consistent with a
          specified cell_methods attribute if present
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        # 10x comment attrs
        # 1x institution
        # 1x source
        # 1x EMPTY references
        assert result.value == (15, 16)
        assert result.msgs[0] == "references global attribute should be a non-empty string"

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        # no references
        # institution is a 10L
        # no source
        # comments doment matter unless they're empty
        assert result.value == (1, 4)
        assert result.msgs[0] == 'salinity:institution should be a non-empty string'
        assert result.msgs[1] == 'source should be defined'
        assert result.msgs[2] == 'references should be defined'

    def test_check_standard_name(self):
        """
        3.3 A standard name is associated with a variable via the attribute standard_name which takes a
        string value comprised of a standard name optionally followed by one or more blanks and a
        standard name modifier
        """
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_standard_name(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.2 Either long_name or standard_name is highly recommended for variable time']
        assert result.value == (0, 1)
        assert "Attribute long_name or/and standard_name is highly recommended for variable time" in result.msgs

        result = result_dict[u'§3.2 Either long_name or standard_name is highly recommended for variable latitude']
        assert result.value == (0, 1)
        assert "Attribute long_name or/and standard_name is highly recommended for variable latitude" in result.msgs
        #assert 'variable latitude\'s attribute standard_name must be a non-empty string or it should define a long_name attribute.' == result.msgs[0]

        result = result_dict[u'§3.3 Variable salinity has valid standard_name attribute']
        assert result.value == (1, 2)
        assert 'standard_name Chadwick is not defined in Standard Name Table' in result.msgs[0]

        result = result_dict[u'§3.3 standard_name modifier for salinity is valid']
        assert result.value == (0, 1)

        assert len(result_dict) == 9

        dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid'])
        results = self.cf.check_standard_name(dataset)
        score, out_of, messages = self.get_results(results)
        # Make sure that the rgrid coordinate variable isn't checked for standard_name
        # time, lat, lon exist with three checks each
        assert (score, out_of) == (11, 11)

    def test_cell_bounds(self):
        dataset = self.load_dataset(STATIC_FILES['grid-boundaries'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (2, 2)

        dataset = self.load_dataset(STATIC_FILES['cf_example_cell_measures'])
        results = self.cf.check_cell_boundaries(dataset)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_cell_boundaries(dataset)

        dataset = self.load_dataset(STATIC_FILES['bounds_bad_order'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        # Make sure that the rgrid coordinate variable isn't checked for standard_name
        assert (score, out_of) == (0, 2)

        # hacky, but handles issues with Python 2/3 string interpolation
        if sys.version_info.major == 3:
            tuple_format = "('nv', 'lat')"
        else:
            tuple_format = "(u'nv', u'lat')"

        assert u"Boundary variable coordinates are in improper order: {}. Bounds-specific dimensions should be last".format(tuple_format) in messages

        dataset = self.load_dataset(STATIC_FILES['bounds_bad_num_coords'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (0, 2)
        assert ('The number of dimensions of the variable lat is 1, but the number of dimensions of the boundary variable lat_bnds is 1. The boundary variable should have 2 dimensions' in
                messages)

        dataset = self.load_dataset(STATIC_FILES['1d_bound_bad'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        if sys.version_info.major == 3:
            tuple_format = "('lon',)"
        else:
            tuple_format = "(u'lon',)"
        assert u"Boundary variable dimension lon_bnds must have at least 2 elements to form a simplex/closed cell with previous dimensions {}.".format(tuple_format) in messages

    def test_cell_measures(self):
        dataset = self.load_dataset(STATIC_FILES['cell_measure'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        assert score == out_of
        assert score > 0

        dataset = self.load_dataset(STATIC_FILES['bad_cell_measure1'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        message = ("The cell_measures attribute for variable PS is formatted incorrectly.  "
                   "It should take the form of either 'area: cell_var' or 'volume: cell_var' "
                   "where cell_var is the variable describing the cell measures")
        assert message in messages

        dataset = self.load_dataset(STATIC_FILES['bad_cell_measure2'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        message = 'Cell measure variable PS referred to by box_area is not present in dataset variables'
        assert message in messages

    def test_climatology(self):
        dataset = self.load_dataset(STATIC_FILES['climatology'])
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)

    def test_check_ancillary_variables(self):
        '''
        Test to ensure that ancillary variables are properly checked
        '''

        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_ancillary_variables(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.4 Ancillary Variables defined by temperature']
        assert result.value == (2, 2)

        dataset = self.load_dataset(STATIC_FILES['bad_reference'])
        results = self.cf.check_ancillary_variables(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.4 Ancillary Variables defined by temp']
        assert result.value == (1, 2)
        assert "temp_qc is not a variable in this dataset" == result.msgs[0]

    def test_download_standard_name_table(self):
        """
        Test that a user can download a specific standard name table
        """
        version = '35'

        data_directory = create_cached_data_dir()
        location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version))
        download_cf_standard_name_table(version, location)

        # Test that the file now exists in location and is the right version
        self.assertTrue(os.path.isfile(location))
        std_names = StandardNameTable(location)
        self.assertEqual(std_names._version, version)
        self.addCleanup(os.remove, location)

    def test_bad_standard_name_table(self):
        """
        Test that failure in case a bad standard name table is passed.
        """
        with pytest.raises(IOError):
            StandardNameTable('dummy_non_existent_file.ext')

    def test_check_flags(self):
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_flags(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.5 time_qc is a valid flags variable']
        assert result.value == (1, 1)
        result = result_dict[u'§3.5 flag_meanings for time_qc']
        assert result.value == (3, 3)
        result = result_dict[u'§3.5 flag_values for time_qc']
        assert result.value == (4, 4)
        # lat(time);
        #   lat:flag_meanings = "";
        result = result_dict[u'§3.5 lat is a valid flags variable']
        assert result.value == (0, 1)
        result = result_dict[u'§3.5 flag_meanings for lat']
        assert result.value == (2, 3)
        assert "flag_meanings can't be empty" == result.msgs[0]

    def test_check_flag_masks(self):
        dataset = self.load_dataset(STATIC_FILES['ghrsst'])
        results = self.cf.check_flags(dataset)
        scored, out_of, messages = self.get_results(results)
        # This is an example of a perfect dataset for flags
        assert scored > 0
        assert scored == out_of

    def test_check_bad_units(self):

        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_units(dataset)
        for result in results:
            self.assert_result_is_good(result)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_units(dataset)
        result_dict = {result.name: result for result in results}

        # it's Degrees_E which is a valid udunits. The preferred units are
        # degrees_east and they are checked in the check_longitude check
        result = result_dict[u'§3.1 Variable longitude\'s units are contained in UDUnits']
        assert result.value == (1, 1)

        result = result_dict[u'§3.1 Variable temp contains valid CF units']
        assert result.value == (3, 3)

        result = result_dict[u'§3.1 Variable temp\'s units are contained in UDUnits']
        assert result.value == (1, 1)

        dataset = self.load_dataset(STATIC_FILES['bad_units'])
        results = self.cf.check_units(dataset)
        result_dict = {result.name: result for result in results}

        # time(time)
        #   time:units = "s"
        result = result_dict[u'§3.1 Variable time contains valid CF units']
        # They are valid and even valid UDUnits
        assert result.value == (3, 3)
        result = result_dict[u"§3.1 Variable time's units are contained in UDUnits"]
        assert result.value == (1, 1)

        # But they are not appropriate for time
        result = result_dict[u"§3.1 Variable time's units are appropriate for the standard_name time"]
        assert result.value == (0, 1)

        # lat;
        #   lat:units = "degrees_E";
        # Should all be good
        result = result_dict[u"§3.1 Variable lat's units are appropriate for the standard_name latitude"]
        assert result.value == (0, 1)

        # lev;
        #   lev:units = "level";
        # level is deprecated
        result = result_dict[u"§3.1 Variable lev contains valid CF units"]
        assert result.value == (2, 3)
        assert 'units for lev, "level" are deprecated by CF 1.6' in result.msgs

        # temp_count(time);
        #   temp_count:standard_name = "atmospheric_temperature number_of_observations";
        #   temp_count:units = "1";
        result = result_dict[u"§3.1 Variable temp_count's units are appropriate for "
                             u"the standard_name atmospheric_temperature number_of_observations"]
        assert result.value == (1, 1)

    def test_latitude(self):
        '''
        Section 4.1 Latitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_latitude(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (3, 3)

        # Verify non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_latitude(dataset)

        result_dict = {result.name: result for result in results}
        scored, out_of, messages = self.get_results(results)

        result = result_dict[u'§4.1 Latitude variable lat has required units attribute']
        assert result.value == (0, 1)
        assert result.msgs[0] == "latitude variable 'lat' must define units"

        result = result_dict[u'§4.1 Latitude variable lat uses recommended units']
        assert result.value == (0, 1)

        result = result_dict[u'§4.1 Latitude variable lat defines units using degrees_north']
        assert result
        assert result.msgs[0] == "CF recommends latitude variable 'lat' to use units degrees_north"

        result = result_dict[u'§4.1 Latitude variable lat defines either standard_name or axis']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Latitude variable lat_uv has required units attribute']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Latitude variable lat_uv uses recommended units']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Latitude variable lat_uv defines units using degrees_north']
        assert result
        assert result.msgs[0] == "CF recommends latitude variable 'lat_uv' to use units degrees_north"

        result = result_dict[u'§4.1 Latitude variable lat_uv defines either standard_name or axis']
        assert result.value == (1, 1)

        assert (scored, out_of) == (9, 12)

        dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid'])
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert (scored, out_of) == (6, 6)
        # hack to avoid writing to read-only file
        dataset.variables['rlat'] = MockVariable(dataset.variables['rlat'])
        rlat = dataset.variables['rlat']
        rlat.name = 'rlat'
        # test with a bad value
        rlat.units = 'degrees_north'
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        wrong_format = "Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}"
        self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages)
        rlat.units = 'radians'
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages)

    def test_longitude(self):
        '''
        Section 4.2 Longitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_longitude(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (3, 3)

        # Verify non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_longitude(dataset)

        result_dict = {result.name: result for result in results}
        scored, out_of, messages = self.get_results(results)

        result = result_dict[u'§4.1 Longitude variable lon has required units attribute']
        assert result.value == (0, 1)
        assert result.msgs[0] == "longitude variable 'lon' must define units"

        result = result_dict[u'§4.1 Longitude variable lon uses recommended units']
        assert result.value == (0, 1)

        result = result_dict[u'§4.1 Longitude variable lon defines units using degrees_east']
        assert result
        assert result.msgs[0] == "CF recommends longitude variable 'lon' to use units degrees_east"

        result = result_dict[u'§4.1 Longitude variable lon defines either standard_name or axis']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Longitude variable lon_uv has required units attribute']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Longitude variable lon_uv uses recommended units']
        assert result.value == (1, 1)

        result = result_dict[u'§4.1 Longitude variable lon_uv defines units using degrees_east']
        assert result
        assert result.msgs[0] == "CF recommends longitude variable 'lon_uv' to use units degrees_east"

        result = result_dict[u'§4.1 Longitude variable lon_uv defines either standard_name or axis']
        assert result.value == (1, 1)

        assert (scored, out_of) == (9, 12)

        dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid'])
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert (scored, out_of) == (6, 6)
        # hack to avoid writing to read-only file
        dataset.variables['rlon'] = MockVariable(dataset.variables['rlon'])
        rlon = dataset.variables['rlon']
        rlon.name = 'rlon'
        # test with a bad value
        rlon.units = 'degrees_east'
        results = self.cf.check_longitude(dataset)
        scored, out_of, messages = self.get_results(results)
        wrong_format = "Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}"
        self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages)
        rlon.units = 'radians'
        results = self.cf.check_longitude(dataset)
        scored, out_of, messages = self.get_results(results)
        self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages)

    def test_is_vertical_coordinate(self):
        '''
        Section 4.3 Qualifiers for Vertical Coordinate

        NOTE: The standard doesn't explicitly say that vertical coordinates must be a
        coordinate type.
        '''
        # Make something that I can attach attrs to
        mock_variable = MockVariable

        # Proper name/standard_name
        known_name = mock_variable()
        known_name.standard_name = 'depth'
        self.assertTrue(is_vertical_coordinate('not_known', known_name))

        # Proper Axis
        axis_set = mock_variable()
        axis_set.axis = 'Z'
        self.assertTrue(is_vertical_coordinate('not_known', axis_set))

        # Proper units
        units_set = mock_variable()
        units_set.units = 'dbar'
        self.assertTrue(is_vertical_coordinate('not_known', units_set))

        # Proper units/positive
        positive = mock_variable()
        positive.units = 'm'
        positive.positive = 'up'
        self.assertTrue(is_vertical_coordinate('not_known', positive))

    def test_vertical_dimension(self):
        '''
        Section 4.3.1 Dimensional Vertical Coordinate
        '''
        # Check for compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        assert len(results) == 1
        assert results[0].name == u'§4.3.1 height is a valid vertical coordinate'
        assert results[0].value == (2, 2)

        dataset = self.load_dataset(STATIC_FILES['illegal-vertical'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        assert len(results) == 1
        assert results[0].name == u'§4.3.1 z is a valid vertical coordinate'
        assert results[0].value == (0, 2)
        assert results[0].msgs[0] == 'units must be defined for vertical coordinates, there is no default'
        assert results[0].msgs[1] == ("vertical coordinates not defining pressure must include a positive attribute that "
                                      "is either 'up' or 'down'")

    def test_appendix_d(self):
        '''
        CF 1.6
        Appendix D
        The definitions given here allow an application to compute dimensional
        coordinate values from the dimensionless ones and associated variables.
        The formulas are expressed for a gridpoint (n,k,j,i) where i and j are
        the horizontal indices, k is the vertical index and n is the time index.
        A coordinate variable is associated with its definition by the value of
        the standard_name attribute. The terms in the definition are associated
        with file variables by the formula_terms attribute. The formula_terms
        attribute takes a string value, the string being comprised of
        blank-separated elements of the form "term: variable", where term is a
        keyword that represents one of the terms in the definition, and variable
        is the name of the variable in a netCDF file that contains the values
        for that term. The order of elements is not significant.
        '''

        # For each of the listed dimensionless vertical coordinates,
        # verify that the formula_terms match the provided set of terms
        self.assertTrue(no_missing_terms('atmosphere_ln_pressure_coordinate',
                                         {"p0", "lev"}))
        self.assertTrue(no_missing_terms('atmosphere_sigma_coordinate',
                                         {"sigma", "ps", "ptop"}))
        self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                         {'a', 'b', 'ps'}))
        # test alternative terms for
        # 'atmosphere_hybrid_sigma_pressure_coordinate'
        self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                         {'ap', 'b', 'ps'}))
        # check that an invalid set of terms fails
        self.assertFalse(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                          {'a', 'b', 'p'}))
        self.assertTrue(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                          {"a", "b", "orog"}))
        # missing terms should cause failure
        self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                          {"a", "b"}))
        # excess terms should cause failure
        self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                         {"a", "b", "c", "orog"}))
        self.assertTrue(no_missing_terms('atmosphere_sleve_coordinate',
                                         {"a", "b1", "b2", "ztop", "zsurf1",
                                          "zsurf2"}))
        self.assertTrue(no_missing_terms('ocean_sigma_coordinate',
                                         {"sigma", "eta", "depth"}))
        self.assertTrue(no_missing_terms('ocean_s_coordinate',
                                         {"s", "eta", "depth", "a", "b",
                                          "depth_c"}))
        self.assertTrue(no_missing_terms('ocean_sigma_z_coordinate',
                                         {"sigma", "eta", "depth", "depth_c",
                                          "nsigma", "zlev"}))
        self.assertTrue(no_missing_terms('ocean_double_sigma_coordinate',
                                         {"sigma", "depth", "z1", "z2", "a",
                                          "href", "k_c"}))

    def test_dimensionless_vertical(self):
        '''
        Section 4.3.2
        '''
        # Check affirmative compliance
        dataset = self.load_dataset(STATIC_FILES['dimensionless'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§4.3.2 lev does not contain deprecated units']
        assert result.value[0] == result.value[1]
        result = result_dict[u'§4.3.2 lev has valid formula_terms']
        assert result.value[0] == result.value[1]

        # Check negative compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§4.3.2 lev1 does not contain deprecated units']
        assert result.value == (1, 1)
        result = result_dict[u'§4.3.2 lev1 has valid formula_terms']
        assert result.value == (0, 1)
        assert result.msgs[0] == u'formula_terms is a required attribute and must be a non-empty string'

        result = result_dict[u'§4.3.2 lev2 has valid formula_terms']
        assert result.value == (4, 5)
        err_str = "The following variable(s) referenced in formula_terms are not present in the dataset variables: var1, var2, var3"
        self.assertTrue(err_str in result.msgs)

        # test with an invalid formula_terms
        dataset.variables['lev2'] = MockVariable(dataset.variables['lev2'])
        lev2 = dataset.variables['lev2']
        lev2.formula_terms = 'a: var1 b:var2 orog:'
        # create a malformed formula_terms attribute and check that it fails
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§4.3.2 lev2 has valid formula_terms']
        self.assertTrue('Attribute formula_terms is not well-formed'
                        in result.msgs)



    def test_is_time_variable(self):
        var1 = MockVariable()
        var1.standard_name = 'time'
        self.assertTrue(is_time_variable('not_time', var1))

        var2 = MockVariable()
        self.assertTrue(is_time_variable('time', var2))

        self.assertFalse(is_time_variable('not_time', var2))

        var3 = MockVariable()
        var3.axis = 'T'
        self.assertTrue(is_time_variable('maybe_time', var3))

        var4 = MockVariable()
        var4.units = 'seconds since 1900-01-01'
        self.assertTrue(is_time_variable('maybe_time', var4))

    def test_check_time_coordinate(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_time_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_time_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'time does not have correct time units' in messages
        assert (scored, out_of) == (1, 2)

    def test_check_calendar(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_calendar(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_calendar(dataset)
        scored, out_of, messages = self.get_results(results)

        assert "Variable time should have a valid calendar: 'nope' is not a valid calendar" in messages

    def test_check_aux_coordinates(self):
        dataset = self.load_dataset(STATIC_FILES['illegal-aux-coords'])
        results = self.cf.check_aux_coordinates(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u"§5.0 Auxiliary Coordinates of h_temp must have a subset of h_temp's dimensions"]
        assert result.value == (2, 4)
        regx = (r"dimensions for auxiliary coordinate variable lat \([xy]c, [xy]c\) are not a subset of dimensions for variable "
                r"h_temp \(xc\)")
        assert re.match(regx, result.msgs[0]) is not None

        result = result_dict[u"§5.0 Auxiliary Coordinates of sal must have a subset of sal's dimensions"]
        assert result.value == (4, 4)

    def test_check_grid_coordinates(self):
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        scored, out_of, messages = self.get_results(results)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.6 Grid Feature T is associated with true latitude and true longitude']
        assert result.value == (2, 2)
        assert (scored, out_of) == (2, 2)

    def test_check_two_dimensional(self):
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        for r in results:
            self.assertTrue(r.value)
        # Need the bad testing
        dataset = self.load_dataset(STATIC_FILES['bad2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        scored, out_of, messages = self.get_results(results)

        result_dict = {result.name: result for result in results}
        # Missing association
        result = result_dict[u'§5.6 Grid Feature T is associated with true latitude and true longitude']
        assert result.msgs[0] == 'T is not associated with a coordinate defining true latitude and sharing a subset of dimensions'
        # Dimensions aren't a subet of the variables'
        result = result_dict[u'§5.6 Grid Feature C is associated with true latitude and true longitude']
        assert result.msgs[0] == 'C is not associated with a coordinate defining true latitude and sharing a subset of dimensions'

    def test_check_reduced_horizontal_grid(self):
        dataset = self.load_dataset(STATIC_FILES['rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.3 PS is a valid reduced horizontal grid']
        assert result.value == (7, 7)

        dataset = self.load_dataset(STATIC_FILES['bad-rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.3 PSa is a valid reduced horizontal grid']
        assert result.value == (6, 7)
        assert result.msgs[0] == "PSa must be associated with a valid longitude coordinate"
        result = result_dict[u'§5.3 PSb is a valid reduced horizontal grid']
        # The dimensions don't line up but another §5.0 check catches it.
        assert result.value == (7, 7)

    def test_check_grid_mapping(self):
        dataset = self.load_dataset(STATIC_FILES['mapping'])
        results = self.cf.check_grid_mapping(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.6 Grid Mapping Variable epsg must define a valid grid mapping']
        assert result.value == (7, 8)
        assert result.msgs[0] == 'false_easting is a required attribute for grid mapping stereographic'

        result = result_dict[u'§5.6 Grid Mapping Variable wgs84 must define a valid grid mapping']
        assert result.value == (3, 3)

        result = result_dict[u'§5.6 Variable lat defining a grid mapping has valid grid_mapping attribute']
        assert result.value == (2, 2)

    def test_check_geographic_region(self):
        dataset = self.load_dataset(STATIC_FILES['bad_region'])
        results = self.cf.check_geographic_region(dataset)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§6.1.1 Geographic region specified by neverland is valid']
        assert result.value == (0, 1)
        result = result_dict[u'§6.1.1 Geographic region specified by geo_region is valid']
        assert result.value == (1, 1)

    def test_check_packed_data(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_packed_data(dataset)
        self.assertEqual(len(results), 4)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertTrue(results[2].value)
        self.assertFalse(results[3].value)

    def test_compress_packed(self):
        """Tests compressed indexed coordinates"""
        dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid'])
        results = self.cf.check_compression_gathering(dataset)
        self.assertTrue(results[0].value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_compression_gathering(dataset)
        self.assertFalse(results[0].value)
        self.assertFalse(results[1].value)

    def test_check_all_features_are_same_type(self):
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_all_features_are_same_type(dataset)
        assert result

        dataset = self.load_dataset(STATIC_FILES['featureType'])
        result = self.cf.check_all_features_are_same_type(dataset)
        assert result

    def test_check_units(self):
        '''
        Ensure that container variables are not checked for units but geophysical variables are
        '''
        dataset = self.load_dataset(STATIC_FILES['units_check'])
        results = self.cf.check_units(dataset)

        # We don't keep track of the variables names for checks that passed, so
        # we can make a strict assertion about how many checks were performed
        # and if there were errors, which there shouldn't be.
        scored, out_of, messages = self.get_results(results)
        assert scored == 20
        assert out_of == 20
        assert messages == []

    def test_check_duplicates(self):
        '''
        Test to verify that the check identifies duplicate axes
        '''
        dataset = self.load_dataset(STATIC_FILES['duplicate_axis'])
        results = self.cf.check_duplicate_axis(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.0 Variable temp does not contain duplicate coordinates']
        assert result.msgs[0] == 'duplicate axis X defined by lon_rho'

    def test_check_multi_dimensional_coords(self):
        '''
        Test to verify that multi dimensional coordinates are checked for
        sharing names with dimensions
        '''
        dataset = self.load_dataset(STATIC_FILES['multi-dim-coordinates'])
        results = self.cf.check_multi_dimensional_coords(dataset)
        scored, out_of, messages = self.get_results(results)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.0 multidimensional coordinate xlon should not have the same name as dimension']
        assert result.msgs[0] == 'xlon shares the same name as one of its dimensions'
        result = result_dict[u'§5.0 multidimensional coordinate xlat should not have the same name as dimension']
        assert result.msgs[0] == 'xlat shares the same name as one of its dimensions'

        assert (scored, out_of) == (2, 4)

    def test_64bit(self):
        dataset = self.load_dataset(STATIC_FILES['ints64'])
        suite = CheckSuite()
        suite.checkers = {
            'cf'        : CFBaseCheck
        }
        suite.run(dataset, 'cf')

    def test_variable_feature_check(self):
        dataset = self.load_dataset(STATIC_FILES['bad-trajectory'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§9.1 Feature Type for temperature is valid trajectory']
        assert result.msgs[0] == 'temperature is not a trajectory, it is detected as a point'

        dataset = self.load_dataset(STATIC_FILES['trajectory-complete'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of

        dataset = self.load_dataset(STATIC_FILES['trajectory-implied'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of

    def test_check_cell_methods(self):
        dataset = self.load_dataset(STATIC_FILES['climatology'])
        results = self.cf.check_cell_methods(dataset)
        scored, out_of, messages = self.get_results(results)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§7.1 temperature has a valid cell_methods attribute format']
        assert result
        result = result_dict[u'§7.3 temperature has valid methods in cell_methods attribute']
        assert result
        result = result_dict[u'§7.3 temperature has valid names in cell_methods attribute']
        assert result

        nc_obj = MockTimeSeries()
        nc_obj.createVariable('temperature', 'd', ('time',))

        temp = nc_obj.variables['temperature']
        temp.cell_methods = 'lat: lon: mean depth: mean (interval: 20 meters)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)
        result_dict = {result.name: result for result in results}
        modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers']
        self.assertTrue(modifier_results.value == (3, 3))
        # modify the cell methods to something invalid
        temp.cell_methods = 'lat: lon: mean depth: mean (interval: x whizbangs)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)
        result_dict = {result.name: result for result in results}
        modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers']
        self.assertFalse(modifier_results.value == (3, 3))
        self.assertTrue('temperature:cell_methods contains an interval value that does not parse as a numeric value: "x".'
                        in messages)
        self.assertTrue('temperature:cell_methods interval units "whizbangs" is not parsable by UDUNITS.'
                        in messages)
        temp.cell_methods = 'lat: lon: mean depth: mean (comment: should not go here interval: 2.5 m)'
        results = self.cf.check_cell_methods(nc_obj)
        self.assertTrue('The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature')
        # standalone comments require no keyword
        temp.cell_methods = 'lon: mean (This is a standalone comment)'
        results = self.cf.check_cell_methods(nc_obj)
        result_dict = {result.name: result for result in results}
        modifier_results = result_dict[u'§7.3.3 temperature has valid cell_methods modifiers']
        self.assertTrue(modifier_results.value == (1, 1))
        temp.cell_methods = 'lat: lon: mean depth: mean (invalid_keyword: this is invalid)'
        results = self.cf.check_cell_methods(nc_obj)
        self.assertTrue('Invalid cell_methods keyword "invalid_keyword" for variable temperature. Must be one of [interval, comment]')
        temp.cell_methods = 'lat: lon: mean depth: mean (interval: 0.2 m comment: This should come last interval: 0.01 degrees)'
        results = self.cf.check_cell_methods(nc_obj)
        self.assertTrue('The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature')
        temp.cell_methods = 'lat: lon: mean depth: mean (interval 0.2 m interval: 0.01 degrees)'
        results = self.cf.check_cell_methods(nc_obj)
        self.assertTrue('Parenthetical content inside cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees')



    # --------------------------------------------------------------------------------
    # Utility Method Tests
    # --------------------------------------------------------------------------------

    def test_temporal_unit_conversion(self):
        self.assertTrue(units_convertible('hours', 'seconds'))
        self.assertFalse(units_convertible('hours', 'hours since 2000-01-01'))

    def test_units_temporal(self):
        self.assertTrue(units_temporal('hours since 2000-01-01'))
        self.assertFalse(units_temporal('hours'))
        self.assertFalse(units_temporal('days since the big bang'))

Exemple #4

0

Afficher le fichier

Fichier : test_cf.py Projet : ioos/compliance-checker

class TestCF(BaseTestCase):

    def setUp(self):
        '''
        Initialize the dataset
        '''
        self.cf = CFBaseCheck()

    # --------------------------------------------------------------------------------
    # Helper Methods
    # --------------------------------------------------------------------------------

    def new_nc_file(self):
        '''
        Make a new temporary netCDF file for the scope of the test
        '''
        nc_file_path = os.path.join(gettempdir(), 'example.nc')
        if os.path.exists(nc_file_path):
            raise IOError('File Exists: %s' % nc_file_path)
        nc = Dataset(nc_file_path, 'w')
        self.addCleanup(os.remove, nc_file_path)
        self.addCleanup(nc.close)
        return nc

    def load_dataset(self, nc_dataset):
        '''
        Return a loaded NC Dataset for the given path
        '''
        if not isinstance(nc_dataset, str):
            raise ValueError("nc_dataset should be a string")

        nc_dataset = Dataset(nc_dataset, 'r')
        self.addCleanup(nc_dataset.close)
        return nc_dataset

    def get_results(self, results):
        '''
        Returns a tuple of the value scored, possible, and a list of messages
        in the result set.
        '''
        out_of = 0
        scored = 0
        for r in results:
            if isinstance(r.value, tuple):
                out_of += r.value[1]
                scored += r.value[0]
            else:
                out_of += 1
                scored += int(r.value)

        # Store the messages
        messages = []
        for r in results:
            messages.extend(r.msgs)

        return scored, out_of, messages

    # --------------------------------------------------------------------------------
    # Compliance Tests
    # --------------------------------------------------------------------------------

    def test_check_data_types(self):
        """
        Invoke check_data_types() and loop through all variables to check data
        types. Pertains to 2.2 The netCDF data types char, byte, short, int,
        float or real, and double are all acceptable.
        """

        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_data_types(dataset)
        assert result.value[0] == result.value[1]

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_data_types(dataset)
        assert result.msgs[0] == u'The variable temp failed because the datatype is int64'
        assert result.value == (6, 7)

    def test_check_child_attr_data_types(self):
        """
        Tests check_child_attr_data_types() to ensure the attributes specified in Section 2.5.1
        have a matching data type to their parent variables."""

        # create dataset using MockDataset (default constructor gives it time dimension)
        ds = MockTimeSeries()
        ds.createVariable("temp", np.float64, dimensions=("time")) # add variable "temp" with dimension "time"

        # check where no special data attrs are present, should result good
        result = self.cf.check_child_attr_data_types(ds) # checks all special attrs for all variables
        self.assert_result_is_good(result)

        # give temp _FillValue as a float, expect good result
        ds.variables['temp'].setncattr("_FillValue", np.float(99999999999999999999.))
        result = self.cf.check_child_attr_data_types(ds)
        self.assert_result_is_good(result)

        # give temp valid_range as an array of floats, all should check out
        ds.variables['temp'].setncattr("valid_range", np.array([35., 38.]))
        result = self.cf.check_child_attr_data_types(ds)
        self.assert_result_is_good(result)

        # now give invalid integer for valid_min; above two should still check out, this one should fail
        ds.variables['temp'].setncattr("valid_min", 45)
        result = self.cf.check_child_attr_data_types(ds)
        self.assert_result_is_bad(result)

        # now give invalid string for valid_max
        ds.variables['temp'].setncattr("valid_max", "eighty")
        result = self.cf.check_child_attr_data_types(ds)
        self.assert_result_is_bad(result)

        # TODO for CF-1.7: actual_range, actual_min/max

    def test_naming_conventions(self):
        '''
        Section 2.3 Naming Conventions

        Variable, dimension and attr names should begin with a letter and be composed of letters, digits, and underscores.
        '''

        # compliant dataset
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_naming_conventions(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of

        # non-compliant dataset
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_naming_conventions(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 3
        assert scored < out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u'§2.3 Naming Conventions' for r in results)

        # another non-compliant dataset
        dataset = self.load_dataset(STATIC_FILES['chap2'])
        results = self.cf.check_naming_conventions(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 3
        assert scored < out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u'§2.3 Naming Conventions' for r in results)


    def test_check_names_unique(self):
        """
        2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same.
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_names_unique(dataset)

        num_var = len(dataset.variables)
        expected = (num_var,) * 2

        self.assertEqual(result.value, expected)

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_names_unique(dataset)
        assert result.value == (6, 7)
        assert result.msgs[0] == u'Variables are not case sensitive. Duplicate variables named: not_unique'

    def test_check_dimension_names(self):
        """
        2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_names(dataset)
        assert result.value == (6, 7)

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_dimension_names(dataset)
        assert result.msgs[0] == u'no_reason has two or more dimensions named time'

    def test_check_dimension_order(self):
        """
        2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z),
        "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y,
        then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the
        left of the spatiotemporal dimensions.
        """
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_order(dataset)
        assert result.value == (5, 6)
        assert result.msgs[0] == (u"really_bad's dimensions are not in the recommended order "
                                  "T, Z, Y, X. They are latitude, power")

        dataset = self.load_dataset(STATIC_FILES['dimension_order'])
        result = self.cf.check_dimension_order(dataset)
        self.assertEqual((3, 3), result.value)
        self.assertEqual([], result.msgs)

    def test_check_fill_value_outside_valid_range(self):
        """
        2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_fill_value_outside_valid_range(dataset)
        assert result.msgs[0] == (u'salinity:_FillValue (1.0) should be outside the '
                                  'range specified by valid_min/valid_max (-10, 10)')

        dataset = self.load_dataset(STATIC_FILES['chap2'])
        result = self.cf.check_fill_value_outside_valid_range(dataset)
        assert result.value == (1, 2)
        assert result.msgs[0] == (u'wind_speed:_FillValue (12.0) should be outside the '
                                  'range specified by valid_min/valid_max (0.0, 20.0)')

    def test_check_conventions_are_cf_16(self):
        """
        §2.6.1 the NUG defined global attribute Conventions to the string value
        "CF-1.6"
        """
        # :Conventions = "CF-1.6"
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "CF-1.6 ,ACDD" ;
        dataset = self.load_dataset(STATIC_FILES['conv_multi'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "NoConvention"
        dataset = self.load_dataset(STATIC_FILES['conv_bad'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertFalse(result.value)
        assert result.msgs[0] == (u'§2.6.1 Conventions global attribute does not contain '
                                  '"CF-1.6". The CF Checker only supports CF-1.6 '
                                  'at this time.')

    def test_check_convention_globals(self):
        """
        Load up a dataset and ensure title and history global attrs are checked
        properly (§2.6.2).
        """

        # check for pass
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_globals(dataset)
        assert result.value[0] == result.value[1]

        # check if it doesn't exist that we pass
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_globals(dataset)
        assert result.value[0] != result.value[1]
        assert result.msgs[0] == u'§2.6.2 global attribute title should exist and be a non-empty string'

    def test_check_convention_possibly_var_attrs(self):
        """
        §2.6.2 The units attribute is required for all variables that represent dimensional quantities
        (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables
        defined in Section 7.4, "Climatological Statistics").

        Units are not required for dimensionless quantities. A variable with no units attribute is assumed
        to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be
        included.

        - units required
        - type must be recognized by udunits
        - if std name specified, must be consistent with standard name table, must also be consistent with a
          specified cell_methods attribute if present
        """

        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        # 10x comment attrs
        # 1x institution
        # 1x source
        # 1x EMPTY references
        assert result.value[0] != result.value[1]
        assert result.msgs[0] == u"§2.6.2 references global attribute should be a non-empty string"

        # load bad_data_type.nc
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        # no references
        # institution is a 10L
        # no source

        # comments don't matter unless they're empty

        assert result.value[0] != result.value[1]
        assert result.msgs[0] == u'§2.6.2 salinity:institution should be a non-empty string'

    def test_check_standard_name(self):
        """
        3.3 A standard name is associated with a variable via the attribute standard_name which takes a
        string value comprised of a standard name optionally followed by one or more blanks and a
        standard name modifier
        """
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_standard_name(dataset)
        for each in results:
            self.assertTrue(each.value)

        # load failing ds
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_standard_name(dataset)
        score, out_of, messages = self.get_results(results)

        # 9 vars checked, 8 fail
        assert len(results) == 9
        assert score < out_of
        assert all(r.name == u"§3.3 Standard Name" for r in results)

        #load different ds --  ll vars pass this check
        dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid'])
        results = self.cf.check_standard_name(dataset)
        score, out_of, messages = self.get_results(results)
        assert score ==  out_of

    def test_cell_bounds(self):
        dataset = self.load_dataset(STATIC_FILES['grid-boundaries'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (2, 2)

        dataset = self.load_dataset(STATIC_FILES['cf_example_cell_measures'])
        results = self.cf.check_cell_boundaries(dataset)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_cell_boundaries(dataset)

        dataset = self.load_dataset(STATIC_FILES['bounds_bad_order'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        # Make sure that the rgrid coordinate variable isn't checked for standard_name
        assert (score, out_of) == (0, 2)

        dataset = self.load_dataset(STATIC_FILES['bounds_bad_num_coords'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)
        assert (score, out_of) == (0, 2)

        dataset = self.load_dataset(STATIC_FILES['1d_bound_bad'])
        results = self.cf.check_cell_boundaries(dataset)
        score, out_of, messages = self.get_results(results)

    def test_cell_measures(self):
        dataset = self.load_dataset(STATIC_FILES['cell_measure'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        assert score == out_of
        assert score > 0

        dataset = self.load_dataset(STATIC_FILES['bad_cell_measure1'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        message = ("The cell_measures attribute for variable PS is formatted incorrectly.  "
                   "It should take the form of either 'area: cell_var' or 'volume: cell_var' "
                   "where cell_var is the variable describing the cell measures")
        assert message in messages

        dataset = self.load_dataset(STATIC_FILES['bad_cell_measure2'])
        results = self.cf.check_cell_measures(dataset)
        score, out_of, messages = self.get_results(results)
        message = u'Cell measure variable PS referred to by box_area is not present in dataset variables'
        assert message in messages

    def test_climatology_cell_methods(self):
        """
        Checks that climatology cell_methods strings are properly validated
        """
        dataset = self.load_dataset(STATIC_FILES['climatology'])
        results = self.cf.check_climatological_statistics(dataset)
        # cell methods in this file is
        # "time: mean within days time: mean over days"
        score, out_of, messages = self.get_results(results)
        self.assertEqual(score, out_of)
        temp_var = dataset.variables['temperature'] = \
                   MockVariable(dataset.variables['temperature'])
        temp_var.cell_methods = 'INVALID'
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)
        self.assertNotEqual(score, out_of)
        # incorrect time units
        temp_var.cell_methods = "time: mean within years time: mean over days"
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)
        self.assertNotEqual(score, out_of)
        # can only have third method over years if first two are within and
        # over days, respectively
        temp_var.cell_methods = "time: mean within years time: mean over years time: sum over years"
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)
        self.assertNotEqual(score, out_of)
        # this, on the other hand, should work.
        temp_var.cell_methods = "time: mean within days time: mean over days time: sum over years"
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)
        self.assertEqual(score, out_of)
        # parenthesized comment to describe climatology
        temp_var.cell_methods = "time: sum within days time: maximum over days (ENSO years)"
        results = self.cf.check_climatological_statistics(dataset)
        score, out_of, messages = self.get_results(results)
        self.assertEqual(score, out_of)

    def test_check_ancillary_variables(self):
        '''
        Test to ensure that ancillary variables are properly checked
        '''

        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_ancillary_variables(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.4 Ancillary Data']
        assert result.value == (2, 2)

        dataset = self.load_dataset(STATIC_FILES['bad_reference'])
        results = self.cf.check_ancillary_variables(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u'§3.4 Ancillary Data']
        assert result.value == (1, 2)
        assert u"temp_qc is not a variable in this dataset" == result.msgs[0]

    def test_download_standard_name_table(self):
        """
        Test that a user can download a specific standard name table
        """
        version = '35'

        data_directory = create_cached_data_dir()
        location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version))
        download_cf_standard_name_table(version, location)

        # Test that the file now exists in location and is the right version
        self.assertTrue(os.path.isfile(location))
        std_names = StandardNameTable(location)
        self.assertEqual(std_names._version, version)
        self.addCleanup(os.remove, location)

    def test_bad_standard_name_table(self):
        """
        Test that failure in case a bad standard name table is passed.
        """
        # would this ever actually be reached by the code?
        with pytest.raises(IOError):
            StandardNameTable('dummy_non_existent_file.ext')

        nc_obj = MockTimeSeries()
        nc_obj.standard_name_table = 'dummy_non_existent_file.ext'
        self.assertFalse(self.cf._find_cf_standard_name_table(nc_obj))

        nc_obj.standard_name_table = np.array([], np.float64)
        self.assertFalse(self.cf._find_cf_standard_name_table(nc_obj))

    def test_check_flags(self):
        """Test that the check for flags works as expected."""

        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_flags(dataset)
        scored, out_of, messages = self.get_results(results)

        # only 4 variables in this dataset do not have perfect scores
        imperfect = [r.value for r in results if r.value[0] < r.value[1]]
        assert len(imperfect) == 4

    def test_check_flag_masks(self):
        dataset = self.load_dataset(STATIC_FILES['ghrsst'])
        results = self.cf.check_flags(dataset)
        scored, out_of, messages = self.get_results(results)
        # This is an example of a perfect dataset for flags
        assert scored > 0
        assert scored == out_of

    def test_check_bad_units(self):
        """Load a dataset with units that are expected to fail (bad_units.nc).
        There are 6 variables in this dataset, three of which should give
        an error:
            - time, with units "s" (should be <units> since <epoch>)
            - lat, with units "degrees_E" (should be degrees)
            - lev, with units "level" (deprecated)"""

        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_units(dataset)
        for result in results:
            self.assert_result_is_good(result)

        # Not sure why bad_data_type was being used, we have a dataset specifically for bad units
        # dataset = self.load_dataset(STATIC_FILES['bad_data_type'])

        dataset = self.load_dataset(STATIC_FILES['bad_units'])
        all_results = self.cf.check_units(dataset)

        # use itertools.chain() to unpack the lists of messages
        results_list = list(chain(*(r.msgs for r in all_results if r.msgs)))

        # check the results only have '§3.1 Units' as the header
        assert all(r.name == u'§3.1 Units' for r in all_results)

        # check that all the expected variables have been hit
        assert all(any(s in msg for msg in results_list) for s in ["time", "lat", "lev"])


    def test_latitude(self):
        '''
        Section 4.1 Latitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_latitude(dataset)
        score, out_of, messages = self.get_results(results)
        assert score == out_of

        # Verify non-compliance -- 9/12 pass
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 12
        assert scored < out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 3
        assert (r.name == u'§4.1 Latitude Coordinates' for r in results)

        # check with another ds -- all 6 vars checked pass
        dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid'])
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 6
        assert scored == out_of
        assert (r.name == u'§4.1 Latitude Coordinates' for r in results)

        # hack to avoid writing to read-only file
        dataset.variables['rlat'] = MockVariable(dataset.variables['rlat'])
        rlat = dataset.variables['rlat']
        rlat.name = 'rlat'
        # test with a bad value
        rlat.units = 'degrees_north'
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        wrong_format = u"Grid latitude variable '{}' should use degree equivalent units without east or north components. Current units are {}"
        self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages)
        rlat.units = 'radians'
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        self.assertTrue(wrong_format.format(rlat.name, rlat.units) in messages)


    def test_longitude(self):
        '''
        Section 4.2 Longitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_longitude(dataset)
        score, out_of, messages = self.get_results(results)
        assert score ==  out_of

        # Verify non-compliance -- 12 checked, 3 fail
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_longitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 12
        assert scored < out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 3
        assert all(r.name == u'§4.1 Latitude Coordinates' for r in results)

        # check different dataset # TODO can be improved for check_latitude too
        dataset = self.load_dataset(STATIC_FILES['rotated_pole_grid'])
        results = self.cf.check_latitude(dataset)
        scored, out_of, messages = self.get_results(results)
        assert (scored, out_of) == (6, 6)
        # hack to avoid writing to read-only file
        dataset.variables['rlon'] = MockVariable(dataset.variables['rlon'])
        rlon = dataset.variables['rlon']
        rlon.name = 'rlon'
        # test with a bad value
        rlon.units = 'degrees_east'
        results = self.cf.check_longitude(dataset)
        scored, out_of, messages = self.get_results(results)
        wrong_format = u"Grid longitude variable '{}' should use degree equivalent units without east or north components. Current units are {}"
        self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages)
        rlon.units = 'radians'
        results = self.cf.check_longitude(dataset)
        scored, out_of, messages = self.get_results(results)
        self.assertTrue(wrong_format.format(rlon.name, rlon.units) in messages)


    def test_is_vertical_coordinate(self):
        '''
        Section 4.3 Qualifiers for Vertical Coordinate

        NOTE: The standard doesn't explicitly say that vertical coordinates must be a
        coordinate type.
        '''
        # Make something that I can attach attrs to
        mock_variable = MockVariable

        # Proper name/standard_name
        known_name = mock_variable()
        known_name.standard_name = 'depth'
        self.assertTrue(is_vertical_coordinate('not_known', known_name))

        # Proper Axis
        axis_set = mock_variable()
        axis_set.axis = 'Z'
        self.assertTrue(is_vertical_coordinate('not_known', axis_set))

        # Proper units
        units_set = mock_variable()
        units_set.units = 'dbar'
        self.assertTrue(is_vertical_coordinate('not_known', units_set))

        # Proper units/positive
        positive = mock_variable()
        positive.units = 'm'
        positive.positive = 'up'
        self.assertTrue(is_vertical_coordinate('not_known', positive))

    def test_vertical_dimension(self):
        '''
        Section 4.3.1 Dimensional Vertical Coordinate
        '''
        # Check for compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        assert len(results) == 1
        assert all(r.name  == u'§4.3 Vertical Coordinate' for r in results)

        # non-compliance -- one check fails
        dataset = self.load_dataset(STATIC_FILES['illegal-vertical'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 1
        assert all(r.name  == u'§4.3 Vertical Coordinate' for r in results)
        assert scored < out_of


    def test_appendix_d(self):
        '''
        CF 1.6
        Appendix D
        The definitions given here allow an application to compute dimensional
        coordinate values from the dimensionless ones and associated variables.
        The formulas are expressed for a gridpoint (n,k,j,i) where i and j are
        the horizontal indices, k is the vertical index and n is the time index.
        A coordinate variable is associated with its definition by the value of
        the standard_name attribute. The terms in the definition are associated
        with file variables by the formula_terms attribute. The formula_terms
        attribute takes a string value, the string being comprised of
        blank-separated elements of the form "term: variable", where term is a
        keyword that represents one of the terms in the definition, and variable
        is the name of the variable in a netCDF file that contains the values
        for that term. The order of elements is not significant.
        '''

        # For each of the listed dimensionless vertical coordinates,
        # verify that the formula_terms match the provided set of terms
        self.assertTrue(no_missing_terms('atmosphere_ln_pressure_coordinate',
                                         {"p0", "lev"}))
        self.assertTrue(no_missing_terms('atmosphere_sigma_coordinate',
                                         {"sigma", "ps", "ptop"}))
        self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                         {'a', 'b', 'ps'}))
        # test alternative terms for
        # 'atmosphere_hybrid_sigma_pressure_coordinate'
        self.assertTrue(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                         {'ap', 'b', 'ps'}))
        # check that an invalid set of terms fails
        self.assertFalse(no_missing_terms('atmosphere_hybrid_sigma_pressure_coordinate',
                                          {'a', 'b', 'p'}))
        self.assertTrue(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                          {"a", "b", "orog"}))
        # missing terms should cause failure
        self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                          {"a", "b"}))
        # excess terms should cause failure
        self.assertFalse(no_missing_terms('atmosphere_hybrid_height_coordinate',
                                         {"a", "b", "c", "orog"}))
        self.assertTrue(no_missing_terms('atmosphere_sleve_coordinate',
                                         {"a", "b1", "b2", "ztop", "zsurf1",
                                          "zsurf2"}))
        self.assertTrue(no_missing_terms('ocean_sigma_coordinate',
                                         {"sigma", "eta", "depth"}))
        self.assertTrue(no_missing_terms('ocean_s_coordinate',
                                         {"s", "eta", "depth", "a", "b",
                                          "depth_c"}))
        self.assertTrue(no_missing_terms('ocean_sigma_z_coordinate',
                                         {"sigma", "eta", "depth", "depth_c",
                                          "nsigma", "zlev"}))
        self.assertTrue(no_missing_terms('ocean_double_sigma_coordinate',
                                         {"sigma", "depth", "z1", "z2", "a",
                                          "href", "k_c"}))

    def test_dimensionless_vertical(self):
        '''
        Section 4.3.2
        '''
        # Check affirmative compliance
        dataset = self.load_dataset(STATIC_FILES['dimensionless'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        scored, out_of, messages = self.get_results(results)

        # all variables checked (2) pass
        assert len(results) == 2
        assert scored == out_of
        assert all(r.name == u"§4.3 Vertical Coordinate" for r in results)

        # Check negative compliance -- 3 out of 4 pass

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 4
        assert scored <= out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u"§4.3 Vertical Coordinate" for r in results)

        # test with an invalid formula_terms
        dataset.variables['lev2'] = MockVariable(dataset.variables['lev2'])
        lev2 = dataset.variables['lev2']
        lev2.formula_terms = 'a: var1 b:var2 orog:'

        # create a malformed formula_terms attribute and check that it fails
        # 2/4 still pass
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        scored, out_of, messages = self.get_results(results)

        assert len(results) == 4
        assert scored <= out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u"§4.3 Vertical Coordinate" for r in results)


    def test_is_time_variable(self):
        var1 = MockVariable()
        var1.standard_name = 'time'
        self.assertTrue(is_time_variable('not_time', var1))

        var2 = MockVariable()
        self.assertTrue(is_time_variable('time', var2))

        self.assertFalse(is_time_variable('not_time', var2))

        var3 = MockVariable()
        var3.axis = 'T'
        self.assertTrue(is_time_variable('maybe_time', var3))

        var4 = MockVariable()
        var4.units = 'seconds since 1900-01-01'
        self.assertTrue(is_time_variable('maybe_time', var4))

    def test_dimensionless_standard_names(self):
        """Check that dimensionless standard names are properly detected"""
        std_names_xml_root = self.cf._std_names._root
        # canonical_units are K, should be False
        self.assertFalse(cfutil.is_dimensionless_standard_name(std_names_xml_root,
                                                             'sea_water_temperature'))
        # canonical_units are 1, should be True
        self.assertTrue(cfutil.is_dimensionless_standard_name(std_names_xml_root,
                                                             'sea_water_practical_salinity'))
        # canonical_units are 1e-3, should be True
        self.assertTrue(cfutil.is_dimensionless_standard_name(std_names_xml_root,
                                                             'sea_water_salinity'))

    def test_check_time_coordinate(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_time_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_time_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert u'time does not have correct time units' in messages
        assert (scored, out_of) == (1, 2)

    def test_check_calendar(self):
        """Load a dataset with an invalid calendar attribute (non-comp/bad.nc).
        This dataset has a variable, "time" with  calendar attribute "nope"."""

        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_calendar(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_calendar(dataset)
        scored, out_of, messages = self.get_results(results)

        assert u"§4.4.1 Variable time should have a valid calendar: 'nope' is not a valid calendar" in messages

    def test_check_aux_coordinates(self):
        dataset = self.load_dataset(STATIC_FILES['illegal-aux-coords'])
        results = self.cf.check_aux_coordinates(dataset)
        result_dict = {result.name: result for result in results}
        result = result_dict[u"§5 Coordinate Systems"]
        assert result.msgs == [] # shouldn't have any messages
        assert result.value == (4, 4)

    def test_check_grid_coordinates(self):
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        scored, out_of, messages = self.get_results(results)

        result_dict = {result.name: result for result in results}
        result = result_dict[u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections']
        assert result.value == (2, 2)
        assert (scored, out_of) == (2, 2)

    def test_check_two_dimensional(self):
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        for r in results:
            self.assertTrue(r.value)
        # Need the bad testing
        dataset = self.load_dataset(STATIC_FILES['bad2dim'])
        results = self.cf.check_grid_coordinates(dataset)
        scored, out_of, messages = self.get_results(results)

        # all variables checked fail (2)
        assert len(results) == 2
        assert scored < out_of
        assert all(r.name == u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections' for r in results)


    def test_check_reduced_horizontal_grid(self):
        dataset = self.load_dataset(STATIC_FILES['rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of
        assert len(results) == 1
        assert all(r.name == u'§5.3 Reduced Horizontal Grid' for r in results)

        # load failing ds -- one variable has failing check
        dataset = self.load_dataset(STATIC_FILES['bad-rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored != out_of
        assert len(results) == 2
        assert len([r for r in results if r.value[0] < r.value[1]]) == 1
        assert all(r.name == u'§5.3 Reduced Horizontal Grid' for r in results)


    def test_check_grid_mapping(self):
        dataset = self.load_dataset(STATIC_FILES['mapping'])
        results = self.cf.check_grid_mapping(dataset)

        # there are 8 results, 2 of which did not have perfect scores
        assert len(results) == 8
        assert len([r.value for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u'§5.6 Horizontal Coorindate Reference Systems, Grid Mappings, Projections' for r in results)


    def test_check_geographic_region(self):
        dataset = self.load_dataset(STATIC_FILES['bad_region'])
        results = self.cf.check_geographic_region(dataset)
        scored, out_of, messages = self.get_results(results)

        # only one variable failed this check in this ds out of 2
        assert len(results) == 2
        assert scored < out_of
        assert u"6.1.1 'Neverland' specified by 'neverland' is not a valid region" in messages


    def test_check_packed_data(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_packed_data(dataset)
        self.assertEqual(len(results), 4)
        self.assertFalse(results[0].value)
        self.assertFalse(results[1].value)
        self.assertTrue(results[2].value)
        self.assertFalse(results[3].value)

    def test_compress_packed(self):
        """Tests compressed indexed coordinates"""
        dataset = self.load_dataset(STATIC_FILES['reduced_horizontal_grid'])
        results = self.cf.check_compression_gathering(dataset)
        self.assertTrue(results[0].value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_compression_gathering(dataset)
        self.assertFalse(results[0].value)
        self.assertFalse(results[1].value)

    def test_check_all_features_are_same_type(self):
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_all_features_are_same_type(dataset)
        assert result

        dataset = self.load_dataset(STATIC_FILES['featureType'])
        result = self.cf.check_all_features_are_same_type(dataset)
        assert result

    def test_featureType_is_case_insensitive(self):
        '''
        Tests that the featureType attribute is case insensitive
        '''
        nc = self.new_nc_file()
        nc.featureType = 'timeseriesprofile'
        result = self.cf.check_feature_type(nc)
        self.assertTrue(result.value == (1, 1))

        nc.featureType = 'timeSeriesProfile'
        result = self.cf.check_feature_type(nc)
        self.assertTrue(result.value == (1, 1))

        nc.featureType = 'traJectorYpRofile'
        result = self.cf.check_feature_type(nc)
        self.assertTrue(result.value == (1, 1))

        # This one should fail
        nc.featureType = 'timeseriesprofilebad'
        result = self.cf.check_feature_type(nc)
        self.assertTrue(result.value == (0, 1))

    def test_check_units(self):
        '''
        Ensure that container variables are not checked for units but geophysical variables are
        '''
        dataset = self.load_dataset(STATIC_FILES['units_check'])
        results = self.cf.check_units(dataset)

        # We don't keep track of the variables names for checks that passed, so
        # we can make a strict assertion about how many checks were performed
        # and if there were errors, which there shouldn't be.
        # FIXME (badams): find a better way of grouping together results by
        #                 variable checked instead of checking the number of
        #                 points scored, which should be deprecated, and
        #                 furthermore is fragile and breaks tests when check
        #                 definitions change
        scored, out_of, messages = self.get_results(results)
        assert scored == 24
        assert out_of == 24
        assert messages == []

    def test_check_duplicates(self):
        '''
        Test to verify that the check identifies duplicate axes. Load the
        duplicate_axis.nc dataset and verify the duplicate axes are accounted
        for.
        '''

        dataset = self.load_dataset(STATIC_FILES['duplicate_axis'])
        results = self.cf.check_duplicate_axis(dataset)
        scored, out_of, messages = self.get_results(results)

        # only one check run here, so we can directly compare all the values
        assert scored != out_of
        assert messages[0] == u"'temp' has duplicate axis X defined by [lon_rho, lon_u]"

    def test_check_multi_dimensional_coords(self):
        '''
        Test to verify that multi dimensional coordinates are checked for
        sharing names with dimensions
        '''
        dataset = self.load_dataset(STATIC_FILES['multi-dim-coordinates'])
        results = self.cf.check_multi_dimensional_coords(dataset)
        scored, out_of, messages = self.get_results(results)

        # 4 variables were checked in this ds, 2 of which passed
        assert len(results) == 4
        assert len([r for r in results if r.value[0] < r.value[1]]) == 2
        assert all(r.name == u"§5 Coordinate Systems" for r in results)


    def test_64bit(self):
        dataset = self.load_dataset(STATIC_FILES['ints64'])
        suite = CheckSuite()
        suite.checkers = {
            'cf'        : CFBaseCheck
        }
        suite.run(dataset, 'cf')

    def test_variable_feature_check(self):

        # non-compliant dataset -- 1/1 fail
        dataset = self.load_dataset(STATIC_FILES['bad-trajectory'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        assert len(results) == 1
        assert scored < out_of
        assert len([r for r in results if r.value[0] < r.value[1]]) == 1
        assert all(r.name == u'§9.1 Features and feature types' for r in results)

        # compliant dataset
        dataset = self.load_dataset(STATIC_FILES['trajectory-complete'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of

        # compliant(?) dataset
        dataset = self.load_dataset(STATIC_FILES['trajectory-implied'])
        results = self.cf.check_variable_features(dataset)
        scored, out_of, messages = self.get_results(results)
        assert scored == out_of


    def test_check_cell_methods(self):
        """Load a dataset (climatology.nc) and check the cell methods.
        This dataset has variable "temperature" which has valid cell_methods
        format, cell_methods attribute, and valid names within the
        cell_methods attribute."""

        dataset = self.load_dataset(STATIC_FILES['climatology'])
        results = self.cf.check_cell_methods(dataset)
        scored, out_of, messages = self.get_results(results)

        # use itertools.chain() to unpack the lists of messages
        results_list = list(chain(*(r.msgs for r in results if r.msgs)))

        # check the results only have expected headers
        assert set([r.name for r in results]).issubset(set([u'§7.1 Cell Boundaries', u'§7.3 Cell Methods']))

        # check that all the expected variables have been hit
        assert all("temperature" in msg for msg in results_list)

        # check that all the results have come back passing
        assert all(r.value[0] == r.value[1] for r in results)

        # create a temporary variable and test this only
        nc_obj = MockTimeSeries()
        nc_obj.createVariable('temperature', 'd', ('time',))

        temp = nc_obj.variables['temperature']
        temp.cell_methods = 'lat: lon: mean depth: mean (interval: 20 meters)'
        results = self.cf.check_cell_methods(nc_obj)
        # invalid components lat, lon, and depth -- expect score == (6, 9)
        scored, out_of, messages = self.get_results(results)
        assert scored != out_of

        temp.cell_methods = 'lat: lon: mean depth: mean (interval: x whizbangs)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)

        # check non-standard comments are gauged correctly
        temp.cell_methods = 'lat: lon: mean depth: mean (comment: should not go here interval: 2.5 m)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)

        self.assertTrue(u'§7.3.3 The non-standard "comment:" element must come after any standard elements in cell_methods for variable temperature' in messages)

        # standalone comments require no keyword
        temp.cell_methods = 'lon: mean (This is a standalone comment)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)
        assert "standalone" not in messages

        # check that invalid keywords dealt with
        temp.cell_methods = 'lat: lon: mean depth: mean (invalid_keyword: this is invalid)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)
        self.assertTrue(u'§7.3.3 Invalid cell_methods keyword "invalid_keyword:" for variable temperature. Must be one of [interval, comment]' in messages)

        # check that "parenthetical elements" are well-formed (they should not be)
        temp.cell_methods = 'lat: lon: mean depth: mean (interval 0.2 m interval: 0.01 degrees)'
        results = self.cf.check_cell_methods(nc_obj)
        scored, out_of, messages = self.get_results(results)
        assert u'§7.3.3 Parenthetical content inside temperature:cell_methods is not well formed: interval 0.2 m interval: 0.01 degrees' in messages



    # --------------------------------------------------------------------------------
    # Utility Method Tests
    # --------------------------------------------------------------------------------

    def test_temporal_unit_conversion(self):
        self.assertTrue(units_convertible('hours', 'seconds'))
        self.assertFalse(units_convertible('hours', 'hours since 2000-01-01'))

    def test_units_temporal(self):
        self.assertTrue(units_temporal('hours since 2000-01-01'))
        self.assertFalse(units_temporal('hours'))
        self.assertFalse(units_temporal('days since the big bang'))

Exemple #5

0

Afficher le fichier

Fichier : test_cf.py Projet : bjlittle/compliance-checker

class TestCF(unittest.TestCase):
    # @see
    # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/
    def shortDescription(self):
        return None

    # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests
    #  ion.module:TestClassName.test_function_name
    def __repr__(self):
        name = self.id()
        name = name.split('.')
        if name[0] not in ["ion", "pyon"]:
            return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
        else:
            return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:]))
    __str__ = __repr__
    
    def setUp(self):
        '''
        Initialize the dataset
        '''
        self.cf = CFBaseCheck()

    #--------------------------------------------------------------------------------
    # Helper Methods
    #--------------------------------------------------------------------------------

    def new_nc_file(self):
        '''
        Make a new temporary netCDF file for the scope of the test
        '''
        nc_file_path = os.path.join(gettempdir(), 'example.nc')
        if os.path.exists(nc_file_path):
            raise IOError('File Exists: %s' % nc_file_path)
        nc = Dataset(nc_file_path, 'w')
        self.addCleanup(os.remove, nc_file_path)
        self.addCleanup(nc.close)
        return nc

    def get_pair(self, nc_dataset):
        '''
        Return a pairwise object for the dataset
        '''
        if isinstance(nc_dataset, basestring):
            nc_dataset = Dataset(nc_dataset, 'r')
            self.addCleanup(nc_dataset.close)
        dogma = NetCDFDogma('nc', self.cf.beliefs(), nc_dataset)
        pair = DSPair(nc_dataset, dogma)
        return pair

    #--------------------------------------------------------------------------------
    # Compliance Tests
    #--------------------------------------------------------------------------------

    def test_check_data_types(self):
        """
        2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_data_types(dataset)
        self.assertTrue(result.value)


        dpair = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_data_types(dpair)
        assert result.value == (5, 6)


    def test_naming_conventions(self):
        '''
        Section 2.3 Naming Conventions

        Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores.
        '''
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.dataset.variables)
        
        expected = (num_var,) * 2
        self.assertEquals(result.value, expected)

        dataset = self.get_pair(static_files['bad'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.dataset.variables)
        expected = (num_var-1, num_var)
        self.assertEquals(result.value, expected)
        assert '_poor_dim' in result.msgs [0]

    def test_check_names_unique(self):
        """
        2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same.
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_names_unique(dataset)

        num_var = len(dataset.dataset.variables)
        expected = (num_var,) * 2

        self.assertEquals(result.value, expected)

        #TODO: Add bad unique names to bad.nc

    def test_check_dimension_names(self):
        """
        2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names.
        """

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_dimension_names(dataset)
        assert result.value == (5, 6)

    def test_check_dimension_order(self):
        """
        2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z),
        "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y,
        then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the
        left of the spatiotemporal dimensions.
        """
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_dimension_order(dataset)
        assert result.value == (11, 12)

    def test_check_fill_value_outside_valid_range(self):
        """
        2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable.
        """

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_fill_value_outside_valid_range(dataset)
        assert result.value == (1, 2)

    def test_check_conventions_are_cf_16(self):
        """
        2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6"
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        #TODO add fail case?

    def test_check_convention_globals(self):
        """
        2.6.2 title/history global attributes, must be strings. Do not need to exist.
        """
        #check for pass
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)
        #check if it doesn't exist that we pass
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)

    def test_check_convention_possibly_var_attrs(self):
        """
        3.1 The units attribute is required for all variables that represent dimensional quantities
        (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables
        defined in Section 7.4, "Climatological Statistics").

        Units are not required for dimensionless quantities. A variable with no units attribute is assumed
        to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be
        included.

        - units required
        - type must be recognized by udunits
        - if std name specified, must be consistent with standard name table, must also be consistent with a
          specified cell_methods attribute if present
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertFalse(each.value)  

    def test_check_standard_name(self):
        """
        3.3 A standard name is associated with a variable via the attribute standard_name which takes a
        string value comprised of a standard name optionally followed by one or more blanks and a
        standard name modifier
        """
        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertFalse(each.value)  




    def test_check_units(self):

        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertFalse(each.value)  



    def test_coordinate_types(self):
        '''
        Section 4 Coordinate Types

        We strongly recommend that coordinate variables be used for all coordinate types whenever they are applicable.
        '''
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_coordinate_vars_for_all_coordinate_types(dataset)
        for each in result:
            print each
            self.assertTrue(each.value)

    def test_check_coordinate_axis_attr(self):

        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            print each
            if each.name[1] in ['time', 'latitude']:
                self.assertTrue(each.value)
            if each.name[1] in ['salinity']:
                if each.name[2] not in ['does_not_depend_on_mult_coord_vars']:
                    self.assertFalse(each.value)  


    def test_latitude(self):
        '''
        Section 4.1 Latitude Coordinate
        '''
        # Check compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_latitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEquals(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)
        
        # Verify non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_latitude(dataset)
        # Store the results in a dict
        rd = {}
        for r in results:
            rd[r.name[1:]] = r.value
        # ('lat', 'has_units') should be False
        self.assertFalse(rd[('lat', 'has_units')])
        # ('lat', 'correct_units') should be (0,3)
        self.assertEquals(rd[('lat', 'correct_units')], (0,3))
        # ('lat_uv', 'has_units') should be True
        self.assertTrue(rd[('lat_uv', 'has_units')])
        # ('lat_uv', 'correct_units') should be (2,3)
        self.assertEquals(rd[('lat_uv', 'correct_units')], (2,3))
        # ('lat_like', 'has_units') should be True
        self.assertTrue(rd[('lat_like', 'has_units')])
        # ('lat_like', 'correct_units') should be (1,3)
        self.assertEquals(rd[('lat_like', 'correct_units')], (1,3))
        

    def test_longitude(self):
        '''
        Section 4.2 Longitude Coordinate
        '''
        # Check compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_longitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEquals(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)
        
        # Verify non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_longitude(dataset)
        # Store the results in a dict
        rd = {}
        for r in results:
            rd[r.name[1:]] = r.value
        # ('lon', 'has_units') should be False
        self.assertFalse(rd[('lon', 'has_units')])
        # ('lon', 'correct_units') should be (0,3)
        self.assertEquals(rd[('lon', 'correct_units')], (0,3))
        # ('lon_uv', 'has_units') should be True
        self.assertTrue(rd[('lon_uv', 'has_units')])
        # ('lon_uv', 'correct_units') should be (2,3)
        self.assertEquals(rd[('lon_uv', 'correct_units')], (2,3))
        # ('lon_like', 'has_units') should be True
        self.assertTrue(rd[('lon_like', 'has_units')])
        # ('lon_like', 'correct_units') should be (1,3)
        self.assertEquals(rd[('lon_like', 'correct_units')], (1,3))

    def test_is_vertical_coordinate(self):
        '''
        Section 4.3 Qualifiers for Vertical Coordinate

        NOTE: The standard doesn't explicitly say that vertical coordinates must be a 
        coordinate type.
        '''
        # Make something that I can attach attrs to
        mock_variable = MockVariable

        # Proper name/standard_name
        known_name = mock_variable()
        known_name.standard_name = 'depth'
        self.assertTrue(is_vertical_coordinate('not_known', known_name))

        # Proper Axis
        axis_set = mock_variable()
        axis_set.axis = 'Z'
        self.assertTrue(is_vertical_coordinate('not_known', axis_set))

        # Proper units
        units_set = mock_variable()
        units_set.units = 'dbar'
        self.assertTrue(is_vertical_coordinate('not_known', units_set))

        # Proper units/positive
        positive = mock_variable()
        positive.units = 'm'
        positive.positive = 'up'
        self.assertTrue(is_vertical_coordinate('not_known', positive))

    def test_vertical_coordinate(self):
        '''
        Section 4.3 Vertical (Height or Depth) coordinate
        '''
        # Check compliance

        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_vertical_coordinate(dataset)
        
        # Store the results by the tuple
        rd = { r.name[1:] : r.value for r in results }
        # ('height', 'has_units') should be False
        self.assertFalse(rd[('height', 'has_units')])
        # ('height', 'correct_units') should be False
        self.assertFalse(rd[('height', 'correct_units')])
        # ('depth', 'has_units') should be True
        self.assertTrue(rd[('depth', 'has_units')])
        # ('depth', 'correct_units') should be False
        self.assertFalse(rd[('depth', 'correct_units')])
        # ('depth2', 'has_units') should be False
        self.assertTrue(rd[('depth2', 'has_units')])
        # ('depth2', 'correct_units') should be False
        self.assertFalse(rd[('depth2', 'correct_units')])
        

    def test_vertical_dimension(self):
        '''
        Section 4.3.1 Dimensional Vertical Coordinate
        '''
        # Check for compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check for non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertFalse(r.value)

    def test_appendix_d(self):
        '''
        CF 1.6
        Appendix D
        The definitions given here allow an application to compute dimensional
        coordinate values from the dimensionless ones and associated variables.
        The formulas are expressed for a gridpoint (n,k,j,i) where i and j are
        the horizontal indices, k is the vertical index and n is the time index.
        A coordinate variable is associated with its definition by the value of
        the standard_name attribute. The terms in the definition are associated
        with file variables by the formula_terms attribute. The formula_terms
        attribute takes a string value, the string being comprised of
        blank-separated elements of the form "term: variable", where term is a
        keyword that represents one of the terms in the definition, and variable
        is the name of the variable in a netCDF file that contains the values
        for that term. The order of elements is not significant.
        '''

        dimless = dict(dimless_vertical_coordinates)
        def verify(std_name, test_str):
            regex_matches = re.match(dimless[std_name], test_str)
            self.assertIsNotNone(regex_matches)

        # For each of the listed dimensionless vertical coordinates, 
        # verify that the formula_terms match the provided regex
        verify('atmosphere_ln_pressure_coordinate', 
                "p0: var1 lev: var2")
        verify('atmosphere_sigma_coordinate', 
                "sigma: var1 ps: var2 ptop: var3")
        verify('atmosphere_hybrid_sigma_pressure_coordinate', 
                "a: var1 b: var2 ps: var3 p0: var4")
        verify('atmosphere_hybrid_height_coordinate', 
                "a: var1 b: var2 orog: var3")
        verify('atmosphere_sleve_coordinate', 
                "a: var1 b1: var2 b2: var3 ztop: var4 zsurf1: var5 zsurf2: var6")
        verify('ocean_sigma_coordinate', 
                "sigma: var1 eta: var2 depth: var3")
        verify('ocean_s_coordinate', 
                "s: var1 eta: var2 depth: var3 a: var4 b: var5 depth_c: var6")
        verify('ocean_sigma_z_coordinate', 
                "sigma: var1 eta: var2 depth: var3 depth_c: var4 nsigma: var5 zlev: var6")
        verify('ocean_double_sigma_coordinate', 
                "sigma: var1 depth: var2 z1: var3 z2: var4 a: var5 href: var6 k_c: var7")

    def test_dimensionless_vertical(self):
        '''
        Section 4.3.2
        '''
        # Check affirmative compliance
        dataset = self.get_pair(static_files['dimensionless'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check negative compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        rd = { r.name[1:] : r.value for r in results }
        
        # ('lev1', 'formula_terms') should be False
        self.assertFalse(rd[('lev1', 'formula_terms')])
        
        # ('lev2', 'formula_terms') should be True
        self.assertTrue(rd[('lev2', 'formula_terms')])
        # ('lev2', 'terms_exist') should be False
        self.assertFalse(rd[('lev2', 'terms_exist')])
            
    def test_is_time_variable(self):
        var1 = MockVariable()
        var1.standard_name = 'time'
        self.assertTrue(is_time_variable('not_time', var1))

        var2 = MockVariable()
        self.assertTrue(is_time_variable('time', var2))

        self.assertFalse(is_time_variable('not_time', var2))

        var3 = MockVariable()
        var3.axis = 'T'
        self.assertTrue(is_time_variable('maybe_time', var3))

        var4 = MockVariable()
        var4.units = 'seconds since 1900-01-01'
        self.assertTrue(is_time_variable('maybe_time', var4))

    def test_check_time_coordinate(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_time_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)


        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_time_coordinate(dataset)
        rd = {r.name[1:] : r.value for r in results }
        self.assertFalse(rd[('bad_time_1', 'has_units')])
        self.assertTrue(rd[('bad_time_2', 'has_units')])
        self.assertFalse(rd[('bad_time_2', 'correct_units')])

    def test_check_calendar(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_calendar(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_calendar(dataset)
        rd = {r.name[1:] : r.value for r in results }
        self.assertFalse(rd[('bad_time_1', 'has_calendar')])
        self.assertFalse(rd[('bad_time_1', 'valid_calendar')])
        self.assertTrue(rd[('bad_time_2', 'has_calendar')])
        self.assertFalse(rd[('bad_time_2', 'valid_calendar')])

    def test_check_independent_axis_dimensions(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_independent_axis_dimensions(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_independent_axis_dimensions(dataset)
        for each in results:
            self.assertFalse(each.value)

    def test_check_two_dimensional(self):
        dataset = self.get_pair(static_files['2dim'])
        results = self.cf.check_two_dimensional(dataset)
        for r in results:
            self.assertTrue(r.value)


        # Need the bad testing
        dataset = self.get_pair(static_files['bad2dim'])
        results = self.cf.check_two_dimensional(dataset)
        self.assertTrue(results[0].value)
        self.assertFalse(results[1].value)
        self.assertFalse(results[2].value)
        self.assertTrue(results[3].value)
        self.assertFalse(results[4].value)
        self.assertTrue(results[5].value)


    def test_check_reduced_horizontal_grid(self):
        dataset = self.get_pair(static_files['rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = { r.name[1] : r.value for r in results }
        self.assertTrue(rd['PS'])

        dataset = self.get_pair(static_files['bad-rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = { r.name[1] : (r.value, r.msgs) for r in results }

        for name, (value, msg) in rd.iteritems():
            self.assertFalse(value)

        self.assertIn('Coordinate longitude is not a proper variable', rd['PSa'][1])
        self.assertIn("Coordinate latitude's dimension, latdim, is not a dimension of PSb", rd['PSb'][1])
        assert 'PSc' not in rd.keys()


    def test_check_horz_crs_grid_mappings_projections(self):
        dataset = self.get_pair(static_files['mapping'])
        results = self.cf.check_horz_crs_grid_mappings_projections(dataset)
        rd = { r.name[1] : r.value for r in results }
        assert rd['wgs84'] == (3, 3)
        assert rd['epsg']  == (7, 8)


    def test_check_scalar_coordinate_system(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_scalar_coordinate_system(dataset)
        assert results[0].value == (1, 2)

    def test_check_geographic_region(self):
        dataset = self.get_pair(static_files['bad_region'])
        results = self.cf.check_geographic_region(dataset)

        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)

    def test_check_alternative_coordinates(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_alternative_coordinates(dataset)
        self.assertTrue(results[0].value)


    #def test_check_cell_boundaries(self):
    #    dataset = self.get_pair(static_files['bad_data_type'])
    #    results = self.cf.check_cell_boundaries(dataset)
    #    print results
    #    self.assertTrue(results[0].value)


    def test_check_packed_data(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_packed_data(dataset)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)


    def test_check_compression(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_compression(dataset)
        assert results[0].value == (2,2)
        assert results[1].value == (0,2)

    def test_check_all_features_are_same_type(self):
        dataset = self.get_pair(static_files['rutgers'])
        results = self.cf.check_all_features_are_same_type(dataset)
        assert results == None

        dataset = self.get_pair(static_files['featureType'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertTrue(results.value)     

        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertFalse(results.value)   

    def test_check_orthogonal_multidim_array(self):
        dataset = self.get_pair(static_files['rutgers'])
        results = self.cf.check_orthogonal_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_incomplete_multidim_array(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_incomplete_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_contiguous_ragged_array(self):
        dataset = self.get_pair(static_files['cont_ragged'])
        results = self.cf.check_contiguous_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_indexed_ragged_array(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_indexed_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)


    def test_check_feature_type(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_feature_type(dataset)
        self.assertTrue(results.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_feature_type(dataset)
        self.assertFalse(results.value)



    def test_check_coordinates_and_metadata(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertFalse(results[2].value)

        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertTrue(results[-1].value)

    def test_check_missing_data(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_missing_data'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertFalse(each.value)

    #--------------------------------------------------------------------------------
    # Utility Method Tests
    #--------------------------------------------------------------------------------

    def test_temporal_unit_conversion(self):
        self.assertTrue(units_convertible('hours', 'seconds'))
        self.assertFalse(units_convertible('hours', 'hours since 2000-01-01'))

Exemple #6

0

Afficher le fichier

Fichier : test_cf.py Projet : castelao/compliance-checker

class TestCF(unittest.TestCase):
    # @see
    # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/

    def shortDescription(self):
        return None

    # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests
    #  ion.module:TestClassName.test_function_name
    def __repr__(self):
        name = self.id()
        name = name.split('.')
        if name[0] not in ["ion", "pyon"]:
            return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
        else:
            return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:]))
    __str__ = __repr__

    def setUp(self):
        '''
        Initialize the dataset
        '''
        self.cf = CFBaseCheck()

    # --------------------------------------------------------------------------------
    # Helper Methods
    # --------------------------------------------------------------------------------

    def new_nc_file(self):
        '''
        Make a new temporary netCDF file for the scope of the test
        '''
        nc_file_path = os.path.join(gettempdir(), 'example.nc')
        if os.path.exists(nc_file_path):
            raise IOError('File Exists: %s' % nc_file_path)
        nc = Dataset(nc_file_path, 'w')
        self.addCleanup(os.remove, nc_file_path)
        self.addCleanup(nc.close)
        return nc

    def load_dataset(self, nc_dataset):
        '''
        Return a loaded NC Dataset for the given path
        '''
        if not isinstance(nc_dataset, str):
            raise ValueError("nc_dataset should be a string")

        nc_dataset = Dataset(nc_dataset, 'r')
        self.addCleanup(nc_dataset.close)
        return nc_dataset

    def get_results(self, results):
        '''
        Returns a tuple of the value scored, possible, and a list of messages
        in the result set.
        '''
        out_of = 0
        scored = 0
        for r in results:
            if isinstance(r.value, tuple):
                out_of += r.value[1]
                scored += r.value[0]
            else:
                out_of += 1
                scored += int(r.value)

        # Store the messages
        messages = []
        for r in results:
            messages.extend(r.msgs)

        return scored, out_of, messages

    # --------------------------------------------------------------------------------
    # Compliance Tests
    # --------------------------------------------------------------------------------

    def test_check_data_types(self):
        """
        2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_data_types(dataset)
        self.assertTrue(result.value)

        dpair = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_data_types(dpair)
        assert result.value == (5, 6)

    def test_naming_conventions(self):
        '''
        Section 2.3 Naming Conventions

        Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores.
        '''
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.variables)

        expected = (num_var,) * 2
        self.assertEqual(result.value, expected)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.variables)
        expected = (num_var - 1, num_var)
        self.assertEqual(result.value, expected)
        assert '_poor_dim' in result.msgs[0]

    def test_check_names_unique(self):
        """
        2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same.
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_names_unique(dataset)

        num_var = len(dataset.variables)
        expected = (num_var,) * 2

        self.assertEqual(result.value, expected)

        # TODO: Add bad unique names to bad.nc

    def test_check_dimension_names(self):
        """
        2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_names(dataset)
        assert result.value == (5, 6)

    def test_check_dimension_order(self):
        """
        2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z),
        "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y,
        then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the
        left of the spatiotemporal dimensions.
        """
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_dimension_order(dataset)
        assert result.value == (11, 12)

    def test_check_fill_value_outside_valid_range(self):
        """
        2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable.
        """

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_fill_value_outside_valid_range(dataset)
        assert sum((result.value for result in results)) == 1
        assert len(results) == 2

    def test_check_conventions_are_cf_16(self):
        """
        2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6"
        """
        # :Conventions = "CF-1.6"
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "CF-1.6 ,ACDD" ;
        dataset = self.load_dataset(STATIC_FILES['conv_multi'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "NoConvention"
        dataset = self.load_dataset(STATIC_FILES['conv_bad'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertFalse(result.value)

    def test_check_convention_globals(self):
        """
        2.6.2 title/history global attributes, must be strings. Do not need to exist.
        """
        # check for pass
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)
        # check if it doesn't exist that we pass
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)

    def test_check_convention_possibly_var_attrs(self):
        """
        3.1 The units attribute is required for all variables that represent dimensional quantities
        (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables
        defined in Section 7.4, "Climatological Statistics").

        Units are not required for dimensionless quantities. A variable with no units attribute is assumed
        to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be
        included.

        - units required
        - type must be recognized by udunits
        - if std name specified, must be consistent with standard name table, must also be consistent with a
          specified cell_methods attribute if present
        """
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_check_standard_name(self):
        """
        3.3 A standard name is associated with a variable via the attribute standard_name which takes a
        string value comprised of a standard name optionally followed by one or more blanks and a
        standard name modifier
        """
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_download_standard_name_table(self):
        """
        Test that a user can download a specific standard name table
        """
        version = '35'

        data_directory = create_cached_data_dir()
        location = os.path.join(data_directory, 'cf-standard-name-table-test-{0}.xml'.format(version))
        download_cf_standard_name_table(version, location)

        # Test that the file now exists in location and is the right version
        self.assertTrue(os.path.isfile(location))
        std_names = StandardNameTable(location)
        self.assertEqual(std_names._version, version)
        self.addCleanup(os.remove, location)

    def test_check_flags(self):
        dataset = self.load_dataset(STATIC_FILES['self_referencing'])
        results = self.cf.check_flags(dataset)
        scored, out_of, messages = self.get_results(results)

        self.assertEqual(scored, 46)
        self.assertEqual(out_of, 59)
        self.assertEqual(messages.count('flag_values must be a list'), 6)
        m_str = r"'flag_values' attribute for variable '\w+' does not have same type \(fv: [<>]?\w+, v: [<>]?\w+\)"
        # make sure flag_values attribute where not equal to variable type
        # has the proper message
        self.assertEqual(sum(bool(re.match(m_str, msg)) for msg in messages), 7)

    def test_check_bad_units(self):

        dataset = self.load_dataset(STATIC_FILES['2dim'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_coordinate_types(self):
        '''
        Section 4 Coordinate Types

        We strongly recommend that coordinate variables be used for all coordinate types whenever they are applicable.
        '''
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_coordinate_vars_for_all_coordinate_types(dataset)
        for each in result:
            self.assertTrue(each.value)

    def test_check_coordinate_axis_attr(self):

        dataset = self.load_dataset(STATIC_FILES['2dim'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            if each.name[1] in ['time', 'latitude']:
                self.assertTrue(each.value)
            if each.name[1] in ['salinity']:
                if each.name[2] not in ['does_not_depend_on_mult_coord_vars']:
                    self.assertFalse(each.value)

    def test_latitude(self):
        '''
        Section 4.1 Latitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_latitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEqual(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)

        # Verify non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_latitude(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'lat does not have units attribute' in messages
        assert 'lat_uv units are acceptable, but not recommended' in messages
        assert 'lat_like does not have units attribute' in messages

        assert scored == 5
        assert out_of == 12

    def test_longitude(self):
        '''
        Section 4.2 Longitude Coordinate
        '''
        # Check compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_longitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEqual(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)

        # Verify non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_longitude(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'lon does not have units attribute' in messages
        assert 'lon_uv units are acceptable, but not recommended' in messages
        assert 'lon_like does not have units attribute' in messages

        assert scored == 5
        assert out_of == 12

    def test_is_vertical_coordinate(self):
        '''
        Section 4.3 Qualifiers for Vertical Coordinate

        NOTE: The standard doesn't explicitly say that vertical coordinates must be a
        coordinate type.
        '''
        # Make something that I can attach attrs to
        mock_variable = MockVariable

        # Proper name/standard_name
        known_name = mock_variable()
        known_name.standard_name = 'depth'
        self.assertTrue(is_vertical_coordinate('not_known', known_name))

        # Proper Axis
        axis_set = mock_variable()
        axis_set.axis = 'Z'
        self.assertTrue(is_vertical_coordinate('not_known', axis_set))

        # Proper units
        units_set = mock_variable()
        units_set.units = 'dbar'
        self.assertTrue(is_vertical_coordinate('not_known', units_set))

        # Proper units/positive
        positive = mock_variable()
        positive.units = 'm'
        positive.positive = 'up'
        self.assertTrue(is_vertical_coordinate('not_known', positive))

    def test_vertical_coordinate(self):
        '''
        Section 4.3 Vertical (Height or Depth) coordinate
        '''
        # Check compliance

        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_vertical_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'height does not have units' in messages
        assert 'vertical variable depth needs to define positive attribute'
        assert 'vertical variable depth2 needs to define positive attribute'

    def test_vertical_dimension(self):
        '''
        Section 4.3.1 Dimensional Vertical Coordinate
        '''
        # Check for compliance
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check for non-compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertFalse(r.value)

    def test_appendix_d(self):
        '''
        CF 1.6
        Appendix D
        The definitions given here allow an application to compute dimensional
        coordinate values from the dimensionless ones and associated variables.
        The formulas are expressed for a gridpoint (n,k,j,i) where i and j are
        the horizontal indices, k is the vertical index and n is the time index.
        A coordinate variable is associated with its definition by the value of
        the standard_name attribute. The terms in the definition are associated
        with file variables by the formula_terms attribute. The formula_terms
        attribute takes a string value, the string being comprised of
        blank-separated elements of the form "term: variable", where term is a
        keyword that represents one of the terms in the definition, and variable
        is the name of the variable in a netCDF file that contains the values
        for that term. The order of elements is not significant.
        '''

        dimless = dict(dimless_vertical_coordinates)

        def verify(std_name, test_str):
            regex_matches = re.match(dimless[std_name], test_str)
            self.assertIsNotNone(regex_matches)

        # For each of the listed dimensionless vertical coordinates,
        # verify that the formula_terms match the provided regex
        verify('atmosphere_ln_pressure_coordinate',
               "p0: var1 lev: var2")
        verify('atmosphere_sigma_coordinate',
               "sigma: var1 ps: var2 ptop: var3")
        verify('atmosphere_hybrid_sigma_pressure_coordinate',
               "a: var1 b: var2 ps: var3 p0: var4")
        verify('atmosphere_hybrid_height_coordinate',
               "a: var1 b: var2 orog: var3")
        verify('atmosphere_sleve_coordinate',
               "a: var1 b1: var2 b2: var3 ztop: var4 zsurf1: var5 zsurf2: var6")
        verify('ocean_sigma_coordinate',
               "sigma: var1 eta: var2 depth: var3")
        verify('ocean_s_coordinate',
               "s: var1 eta: var2 depth: var3 a: var4 b: var5 depth_c: var6")
        verify('ocean_sigma_z_coordinate',
               "sigma: var1 eta: var2 depth: var3 depth_c: var4 nsigma: var5 zlev: var6")
        verify('ocean_double_sigma_coordinate',
               "sigma: var1 depth: var2 z1: var3 z2: var4 a: var5 href: var6 k_c: var7")

    def test_dimensionless_vertical(self):
        '''
        Section 4.3.2
        '''
        # Check affirmative compliance
        dataset = self.load_dataset(STATIC_FILES['dimensionless'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check negative compliance
        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'formula_terms missing from dimensionless coordinate lev1' in messages
        assert 'formula_terms not defined for dimensionless coordinate lev1' in messages
        assert 'var1 missing for dimensionless coordinate lev2' in messages
        assert 'var2 missing for dimensionless coordinate lev2' in messages
        assert 'var3 missing for dimensionless coordinate lev2' in messages
        assert scored == 1
        assert out_of == 4

    def test_is_time_variable(self):
        var1 = MockVariable()
        var1.standard_name = 'time'
        self.assertTrue(is_time_variable('not_time', var1))

        var2 = MockVariable()
        self.assertTrue(is_time_variable('time', var2))

        self.assertFalse(is_time_variable('not_time', var2))

        var3 = MockVariable()
        var3.axis = 'T'
        self.assertTrue(is_time_variable('maybe_time', var3))

        var4 = MockVariable()
        var4.units = 'seconds since 1900-01-01'
        self.assertTrue(is_time_variable('maybe_time', var4))

    def test_check_time_coordinate(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_time_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_time_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'bad_time_1 does not have units' in messages
        assert 'bad_time_2 doesn not have correct time units' in messages
        assert scored == 1
        assert out_of == 3

    def test_check_calendar(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_calendar(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_calendar(dataset)
        scored, out_of, messages = self.get_results(results)

        assert 'Variable bad_time_1 should have a calendar attribute' in messages
        assert "Variable bad_time_2 should have a valid calendar: 'nope' is not a valid calendar" in messages

    def test_self_referencing(self):
        '''
        This test captures a check where a coordinate has circular references
        '''
        dataset = self.load_dataset(STATIC_FILES['self_referencing'])
        results = self.cf.check_two_dimensional(dataset)

        scored, out_of, messages = self.get_results(results)
        assert "Variable LATITUDE's coordinate references itself" in messages
        assert scored == 1
        assert out_of == 3

        dataset = self.load_dataset(STATIC_FILES['valid_coordinates'])
        results = self.cf.check_two_dimensional(dataset)
        scored, out_of, messages = self.get_results(results)
        assert out_of == 4
        assert scored == 4
        assert "Variable CD_310's coordinate references itself" not in messages

    def test_check_independent_axis_dimensions(self):
        dataset = self.load_dataset(STATIC_FILES['example-grid'])
        results = self.cf.check_independent_axis_dimensions(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.load_dataset(STATIC_FILES['bad'])
        results = self.cf.check_independent_axis_dimensions(dataset)

        scored, out_of, messages = self.get_results(results)
        assert 'The lev dimension for the variable lev1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
            in messages
        assert 'The lev dimension for the variable lev2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
            in messages
        assert 'The time dimension for the variable bad_time_1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
            in messages
        assert 'The time dimension for the variable bad_time_2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
            in messages
        assert 'The time dimension for the variable column_temp does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
            in messages
        assert scored == 6
        assert out_of == 11

    def test_check_two_dimensional(self):
        dataset = self.load_dataset(STATIC_FILES['2dim'])
        results = self.cf.check_two_dimensional(dataset)
        for r in results:
            self.assertTrue(r.value)
        # Need the bad testing
        dataset = self.load_dataset(STATIC_FILES['bad2dim'])
        results = self.cf.check_two_dimensional(dataset)

        scored, out_of, messages = self.get_results(results)

        assert "Variable T's coordinate, lat, is not a coordinate or auxiliary variable" in messages
        assert "coordinate lat is not a correct lat/lon variable" in messages
        assert "Variable C's coordinate, lat_p, does not share dimension x with the variable" in messages

    def test_check_reduced_horizontal_grid(self):
        dataset = self.load_dataset(STATIC_FILES['rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = { r.name[1] : r.value for r in results }
        self.assertTrue(rd['PS'])

        dataset = self.load_dataset(STATIC_FILES['bad-rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = { r.name[1] : (r.value, r.msgs) for r in results }

        for name, (value, msg) in rd.items():
            self.assertFalse(value)

        self.assertIn('Coordinate longitude is not a proper variable', rd['PSa'][1])
        self.assertIn("Coordinate latitude's dimension, latdim, is not a dimension of PSb", rd['PSb'][1])
        assert 'PSc' not in list(rd.keys())

    def test_check_horz_crs_grid_mappings_projections(self):
        dataset = self.load_dataset(STATIC_FILES['mapping'])
        results = self.cf.check_horz_crs_grid_mappings_projections(dataset)
        rd = { r.name[1] : r.value for r in results }
        assert rd['wgs84'] == (3, 3)
        assert rd['epsg']  == (7, 8)

    def test_check_scalar_coordinate_system(self):
        dataset = self.load_dataset(STATIC_FILES['scalar_coordinate_variable'])
        results = self.cf.check_scalar_coordinate_system(dataset)
        self.assertEqual(len(results), 2)
        for r in results:
            if r.name[1] == 'HEIGHT':
                self.assertEqual(r.value, (0, 1))
            elif r.name[1] == 'DEPTH':
                self.assertEqual(r.value, (2, 2))
            else:
                self.assertTrue(False, 'Unexpected variable in results of check_scalar_coordinate_system')

    def test_check_geographic_region(self):
        dataset = self.load_dataset(STATIC_FILES['bad_region'])
        results = self.cf.check_geographic_region(dataset)

        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)

    # def test_check_cell_boundaries(self):
    #    dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
    #    results = self.cf.check_cell_boundaries(dataset)
    #    print results
    #    self.assertTrue(results[0].value)

    def test_check_packed_data(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_packed_data(dataset)
        self.assertEqual(len(results), 4)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertTrue(results[2].value)
        self.assertFalse(results[3].value)

    def test_check_compression(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_compression(dataset)
        assert results[0].value == (2, 2)
        assert results[1].value == (0, 2)

    def test_check_all_features_are_same_type(self):
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_all_features_are_same_type(dataset)
        assert results is None

        dataset = self.load_dataset(STATIC_FILES['featureType'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertTrue(results.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertFalse(results.value)

    def test_check_orthogonal_multidim_array(self):
        dataset = self.load_dataset(STATIC_FILES['rutgers'])
        results = self.cf.check_orthogonal_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_incomplete_multidim_array(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_incomplete_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_contiguous_ragged_array(self):
        dataset = self.load_dataset(STATIC_FILES['cont_ragged'])
        results = self.cf.check_contiguous_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_indexed_ragged_array(self):
        dataset = self.load_dataset(STATIC_FILES['index_ragged'])
        results = self.cf.check_indexed_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_feature_type(self):
        dataset = self.load_dataset(STATIC_FILES['index_ragged'])
        results = self.cf.check_feature_type(dataset)
        self.assertTrue(results.value)

        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_feature_type(dataset)
        self.assertFalse(results.value)

    def test_check_coordinates_and_metadata(self):
        dataset = self.load_dataset(STATIC_FILES['bad_data_type'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertFalse(results[2].value)

        dataset = self.load_dataset(STATIC_FILES['index_ragged'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertTrue(results[-1].value)

        dataset = self.load_dataset(STATIC_FILES['coordinates_and_metadata'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertTrue(len(results) == 2)
        self.assertFalse(results[0].value)
        self.assertFalse(results[1].value)

    def test_check_missing_data(self):
        dataset = self.load_dataset(STATIC_FILES['index_ragged'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertTrue(each.value)

        dataset = self.load_dataset(STATIC_FILES['bad_missing_data'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertFalse(each.value)

    def test_check_units(self):
        '''
        Ensure that container variables are not checked for units but geophysical variables are
        '''
        dataset = self.load_dataset(STATIC_FILES['units_check'])
        results = self.cf.check_units(dataset)

        # We don't keep track of the variables names for checks that passed, so
        # we can make a strict assertion about how many checks were performed
        # and if there were errors, which there shouldn't be.
        scored, out_of, messages = self.get_results(results)
        assert scored == 4
        assert out_of == 4
        assert messages == []

    def test_64bit(self):
        dataset = self.load_dataset(STATIC_FILES['ints64'])
        suite = CheckSuite()
        suite.checkers = {
            'cf'        : CFBaseCheck
        }
        suite.run(dataset, 'cf')

    def test_time_units(self):
        dataset = self.load_dataset(STATIC_FILES['time_units'])
        results = self.cf.check_units(dataset)
        scored, out_of, messages = self.get_results(results)
        assert 'units are days since 1970-01-01, standard_name units should be K' in messages
        assert scored == 1
        assert out_of == 2

    # --------------------------------------------------------------------------------
    # Utility Method Tests
    # --------------------------------------------------------------------------------

    def test_temporal_unit_conversion(self):
        self.assertTrue(units_convertible('hours', 'seconds'))
        self.assertFalse(units_convertible('hours', 'hours since 2000-01-01'))

    def test_units_temporal(self):
        self.assertTrue(units_temporal('hours since 2000-01-01'))
        self.assertFalse(units_temporal('hours'))
        self.assertFalse(units_temporal('days since the big bang'))

Exemple #7

0

Afficher le fichier

class TestCF(unittest.TestCase):
    # @see
    # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/
    def shortDescription(self):
        return None

    # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests
    #  ion.module:TestClassName.test_function_name
    def __repr__(self):
        name = self.id()
        name = name.split('.')
        if name[0] not in ["ion", "pyon"]:
            return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
        else:
            return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" +
                                  '.'.join(name[-2:]))

    __str__ = __repr__

    def setUp(self):
        '''
        Initialize the dataset
        '''
        self.cf = CFBaseCheck()

    #--------------------------------------------------------------------------------
    # Helper Methods
    #--------------------------------------------------------------------------------

    def new_nc_file(self):
        '''
        Make a new temporary netCDF file for the scope of the test
        '''
        nc_file_path = os.path.join(gettempdir(), 'example.nc')
        if os.path.exists(nc_file_path):
            raise IOError('File Exists: %s' % nc_file_path)
        nc = Dataset(nc_file_path, 'w')
        self.addCleanup(os.remove, nc_file_path)
        self.addCleanup(nc.close)
        return nc

    def get_pair(self, nc_dataset):
        '''
        Return a pairwise object for the dataset
        '''
        if isinstance(nc_dataset, basestring):
            nc_dataset = Dataset(nc_dataset, 'r')
            self.addCleanup(nc_dataset.close)
        dogma = NetCDFDogma('nc', self.cf.beliefs(), nc_dataset)
        pair = DSPair(nc_dataset, dogma)
        return pair

    def get_results(self, results):
        '''
        Returns a tuple of the value scored, possible, and a list of messages
        in the result set.
        '''
        out_of = 0
        scored = 0
        for r in results:
            if isinstance(r.value, tuple):
                out_of += r.value[1]
                scored += r.value[0]
            else:
                out_of += 1
                scored += int(r.value)

        # Store the messages
        messages = []
        for r in results:
            messages.extend(r.msgs)

        return scored, out_of, messages

    #--------------------------------------------------------------------------------
    # Compliance Tests
    #--------------------------------------------------------------------------------

    def test_check_data_types(self):
        """
        2.2 The netCDF data types char, byte, short, int, float or real, and double are all acceptable
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_data_types(dataset)
        self.assertTrue(result.value)

        dpair = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_data_types(dpair)
        assert result.value == (5, 6)

    def test_naming_conventions(self):
        '''
        Section 2.3 Naming Conventions

        Variable, dimension and attribute names should begin with a letter and be composed of letters, digits, and underscores.
        '''
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.dataset.variables)

        expected = (num_var, ) * 2
        self.assertEquals(result.value, expected)

        dataset = self.get_pair(static_files['bad'])
        result = self.cf.check_naming_conventions(dataset)
        num_var = len(dataset.dataset.variables)
        expected = (num_var - 1, num_var)
        self.assertEquals(result.value, expected)
        assert '_poor_dim' in result.msgs[0]

    def test_check_names_unique(self):
        """
        2.3 names should not be distinguished purely by case, i.e., if case is disregarded, no two names should be the same.
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_names_unique(dataset)

        num_var = len(dataset.dataset.variables)
        expected = (num_var, ) * 2

        self.assertEquals(result.value, expected)

        #TODO: Add bad unique names to bad.nc

    def test_check_dimension_names(self):
        """
        2.4 A variable may have any number of dimensions, including zero, and the dimensions must all have different names.
        """

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_dimension_names(dataset)
        assert result.value == (5, 6)

    def test_check_dimension_order(self):
        """
        2.4 If any or all of the dimensions of a variable have the interpretations of "date or time" (T), "height or depth" (Z),
        "latitude" (Y), or "longitude" (X) then we recommend, those dimensions to appear in the relative order T, then Z, then Y,
        then X in the CDL definition corresponding to the file. All other dimensions should, whenever possible, be placed to the
        left of the spatiotemporal dimensions.
        """
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_dimension_order(dataset)
        assert result.value == (11, 12)

    def test_check_fill_value_outside_valid_range(self):
        """
        2.5.1 The _FillValue should be outside the range specified by valid_range (if used) for a variable.
        """

        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_fill_value_outside_valid_range(dataset)
        assert sum((result.value for result in results)) == 1
        assert len(results) == 2

    def test_check_conventions_are_cf_16(self):
        """
        2.6.1 the NUG defined global attribute Conventions to the string value "CF-1.6"
        """
        # :Conventions = "CF-1.6"
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "CF-1.6 ,ACDD" ;
        dataset = self.get_pair(static_files['conv_multi'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertTrue(result.value)

        # :Conventions = "NoConvention"
        dataset = self.get_pair(static_files['conv_bad'])
        result = self.cf.check_conventions_are_cf_16(dataset)
        self.assertFalse(result.value)

    def test_check_convention_globals(self):
        """
        2.6.2 title/history global attributes, must be strings. Do not need to exist.
        """
        #check for pass
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)
        #check if it doesn't exist that we pass
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_convention_globals(dataset)
        for each in result:
            self.assertTrue(each.value)

    def test_check_convention_possibly_var_attrs(self):
        """
        3.1 The units attribute is required for all variables that represent dimensional quantities
        (except for boundary variables defined in Section 7.1, "Cell Boundaries" and climatology variables
        defined in Section 7.4, "Climatological Statistics").

        Units are not required for dimensionless quantities. A variable with no units attribute is assumed
        to be dimensionless. However, a units attribute specifying a dimensionless unit may optionally be
        included.

        - units required
        - type must be recognized by udunits
        - if std name specified, must be consistent with standard name table, must also be consistent with a
          specified cell_methods attribute if present
        """
        dataset = self.get_pair(static_files['rutgers'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_convention_possibly_var_attrs(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_check_standard_name(self):
        """
        3.3 A standard name is associated with a variable via the attribute standard_name which takes a
        string value comprised of a standard name optionally followed by one or more blanks and a
        standard name modifier
        """
        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_standard_name(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_check_flags(self):
        dataset = self.get_pair(static_files['self_referencing'])
        results = self.cf.check_flags(dataset)
        scored, out_of, messages = self.get_results(results)

        self.assertEqual(scored, 46)
        self.assertEqual(out_of, 59)
        self.assertEqual(messages.count(u'flag_values must be a list'), 6)
        self.assertEqual(
            messages.count(
                u'flag_values attr does not have same type as var (fv: int8, v: int16)'
            ), 6)
        self.assertEqual(
            messages.count(
                u'flag_values attr does not have same type as var (fv: <U1, v: int16)'
            ), 1)

    def test_check_units(self):

        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_units(dataset)
        for each in result:
            self.assertFalse(each.value)

    def test_coordinate_types(self):
        '''
        Section 4 Coordinate Types

        We strongly recommend that coordinate variables be used for all coordinate types whenever they are applicable.
        '''
        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_coordinate_vars_for_all_coordinate_types(
            dataset)
        for each in result:
            self.assertTrue(each.value)

    def test_check_coordinate_axis_attr(self):

        dataset = self.get_pair(static_files['2dim'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        result = self.cf.check_coordinate_axis_attr(dataset)
        for each in result:
            if each.name[1] in ['time', 'latitude']:
                self.assertTrue(each.value)
            if each.name[1] in ['salinity']:
                if each.name[2] not in ['does_not_depend_on_mult_coord_vars']:
                    self.assertFalse(each.value)

    def test_latitude(self):
        '''
        Section 4.1 Latitude Coordinate
        '''
        # Check compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_latitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEquals(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)

        # Verify non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_latitude(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'lat does not have units attribute' in messages
        assert 'lat_uv units are acceptable, but not recommended' in messages
        assert 'lat_like does not have units attribute' in messages

        assert scored == 5
        assert out_of == 12

    def test_longitude(self):
        '''
        Section 4.2 Longitude Coordinate
        '''
        # Check compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_longitude(dataset)
        for r in results:
            if isinstance(r.value, tuple):
                self.assertEquals(r.value[0], r.value[1])
            else:
                self.assertTrue(r.value)

        # Verify non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_longitude(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'lon does not have units attribute' in messages
        assert 'lon_uv units are acceptable, but not recommended' in messages
        assert 'lon_like does not have units attribute' in messages

        assert scored == 5
        assert out_of == 12

    def test_is_vertical_coordinate(self):
        '''
        Section 4.3 Qualifiers for Vertical Coordinate

        NOTE: The standard doesn't explicitly say that vertical coordinates must be a 
        coordinate type.
        '''
        # Make something that I can attach attrs to
        mock_variable = MockVariable

        # Proper name/standard_name
        known_name = mock_variable()
        known_name.standard_name = 'depth'
        self.assertTrue(is_vertical_coordinate('not_known', known_name))

        # Proper Axis
        axis_set = mock_variable()
        axis_set.axis = 'Z'
        self.assertTrue(is_vertical_coordinate('not_known', axis_set))

        # Proper units
        units_set = mock_variable()
        units_set.units = 'dbar'
        self.assertTrue(is_vertical_coordinate('not_known', units_set))

        # Proper units/positive
        positive = mock_variable()
        positive.units = 'm'
        positive.positive = 'up'
        self.assertTrue(is_vertical_coordinate('not_known', positive))

    def test_vertical_coordinate(self):
        '''
        Section 4.3 Vertical (Height or Depth) coordinate
        '''
        # Check compliance

        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_vertical_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert 'height does not have units' in messages
        assert 'vertical variable depth needs to define positive attribute'
        assert 'vertical variable depth2 needs to define positive attribute'

    def test_vertical_dimension(self):
        '''
        Section 4.3.1 Dimensional Vertical Coordinate
        '''
        # Check for compliance
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check for non-compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_dimensional_vertical_coordinate(dataset)
        for r in results:
            self.assertFalse(r.value)

    def test_appendix_d(self):
        '''
        CF 1.6
        Appendix D
        The definitions given here allow an application to compute dimensional
        coordinate values from the dimensionless ones and associated variables.
        The formulas are expressed for a gridpoint (n,k,j,i) where i and j are
        the horizontal indices, k is the vertical index and n is the time index.
        A coordinate variable is associated with its definition by the value of
        the standard_name attribute. The terms in the definition are associated
        with file variables by the formula_terms attribute. The formula_terms
        attribute takes a string value, the string being comprised of
        blank-separated elements of the form "term: variable", where term is a
        keyword that represents one of the terms in the definition, and variable
        is the name of the variable in a netCDF file that contains the values
        for that term. The order of elements is not significant.
        '''

        dimless = dict(dimless_vertical_coordinates)

        def verify(std_name, test_str):
            regex_matches = re.match(dimless[std_name], test_str)
            self.assertIsNotNone(regex_matches)

        # For each of the listed dimensionless vertical coordinates,
        # verify that the formula_terms match the provided regex
        verify('atmosphere_ln_pressure_coordinate', "p0: var1 lev: var2")
        verify('atmosphere_sigma_coordinate',
               "sigma: var1 ps: var2 ptop: var3")
        verify('atmosphere_hybrid_sigma_pressure_coordinate',
               "a: var1 b: var2 ps: var3 p0: var4")
        verify('atmosphere_hybrid_height_coordinate',
               "a: var1 b: var2 orog: var3")
        verify(
            'atmosphere_sleve_coordinate',
            "a: var1 b1: var2 b2: var3 ztop: var4 zsurf1: var5 zsurf2: var6")
        verify('ocean_sigma_coordinate', "sigma: var1 eta: var2 depth: var3")
        verify('ocean_s_coordinate',
               "s: var1 eta: var2 depth: var3 a: var4 b: var5 depth_c: var6")
        verify(
            'ocean_sigma_z_coordinate',
            "sigma: var1 eta: var2 depth: var3 depth_c: var4 nsigma: var5 zlev: var6"
        )
        verify(
            'ocean_double_sigma_coordinate',
            "sigma: var1 depth: var2 z1: var3 z2: var4 a: var5 href: var6 k_c: var7"
        )

    def test_dimensionless_vertical(self):
        '''
        Section 4.3.2
        '''
        # Check affirmative compliance
        dataset = self.get_pair(static_files['dimensionless'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Check negative compliance
        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_dimensionless_vertical_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert u'formula_terms missing from dimensionless coordinate lev1' in messages
        assert u'formula_terms not defined for dimensionless coordinate lev1' in messages
        assert u'var1 missing for dimensionless coordinate lev2' in messages
        assert u'var2 missing for dimensionless coordinate lev2' in messages
        assert u'var3 missing for dimensionless coordinate lev2' in messages
        assert scored == 1
        assert out_of == 4

    def test_is_time_variable(self):
        var1 = MockVariable()
        var1.standard_name = 'time'
        self.assertTrue(is_time_variable('not_time', var1))

        var2 = MockVariable()
        self.assertTrue(is_time_variable('time', var2))

        self.assertFalse(is_time_variable('not_time', var2))

        var3 = MockVariable()
        var3.axis = 'T'
        self.assertTrue(is_time_variable('maybe_time', var3))

        var4 = MockVariable()
        var4.units = 'seconds since 1900-01-01'
        self.assertTrue(is_time_variable('maybe_time', var4))

    def test_check_time_coordinate(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_time_coordinate(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_time_coordinate(dataset)

        scored, out_of, messages = self.get_results(results)

        assert u'bad_time_1 does not have units' in messages
        assert u'bad_time_2 doesn not have correct time units' in messages
        assert scored == 1
        assert out_of == 3

    def test_check_calendar(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_calendar(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_calendar(dataset)
        scored, out_of, messages = self.get_results(results)

        assert u'Variable bad_time_1 should have a calendar attribute' in messages
        assert u"Variable bad_time_2 should have a valid calendar: 'nope' is not a valid calendar" in messages

    def test_self_referencing(self):
        '''
        This test captures a check where a coordinate has circular references
        '''
        dataset = self.get_pair(static_files['self_referencing'])
        results = self.cf.check_two_dimensional(dataset)

        scored, out_of, messages = self.get_results(results)
        assert u"Variable TEMP_H's coordinate references itself" in messages
        assert scored == 0
        assert out_of == 44

    def test_check_independent_axis_dimensions(self):
        dataset = self.get_pair(static_files['example-grid'])
        results = self.cf.check_independent_axis_dimensions(dataset)
        for r in results:
            self.assertTrue(r.value)

        dataset = self.get_pair(static_files['bad'])
        results = self.cf.check_independent_axis_dimensions(dataset)

        scored, out_of, messages = self.get_results(results)
        assert u'The lev dimension for the variable lev1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
                in messages
        assert u'The lev dimension for the variable lev2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
                in messages
        assert u'The time dimension for the variable bad_time_1 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
                in messages
        assert u'The time dimension for the variable bad_time_2 does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
                in messages
        assert u'The time dimension for the variable column_temp does not have an associated coordinate variable, but is a Lat/Lon/Time/Height dimension.' \
                in messages
        assert scored == 6
        assert out_of == 11

    def test_check_two_dimensional(self):
        dataset = self.get_pair(static_files['2dim'])
        results = self.cf.check_two_dimensional(dataset)
        for r in results:
            self.assertTrue(r.value)

        # Need the bad testing
        dataset = self.get_pair(static_files['bad2dim'])
        results = self.cf.check_two_dimensional(dataset)
        self.assertTrue(results[0].value)
        self.assertFalse(results[1].value)
        self.assertFalse(results[2].value)
        self.assertTrue(results[3].value)
        self.assertFalse(results[4].value)
        self.assertTrue(results[5].value)

        # Test the self referencing variables
        dataset = self.get_pair(static_files['self-referencing-var'])
        try:
            results = self.cf.check_two_dimensional(dataset)
            self.assertFalse(results[0].value)
        except:
            self.assertTrue(False)

    def test_check_reduced_horizontal_grid(self):
        dataset = self.get_pair(static_files['rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = {r.name[1]: r.value for r in results}
        self.assertTrue(rd['PS'])

        dataset = self.get_pair(static_files['bad-rhgrid'])
        results = self.cf.check_reduced_horizontal_grid(dataset)
        rd = {r.name[1]: (r.value, r.msgs) for r in results}

        for name, (value, msg) in rd.iteritems():
            self.assertFalse(value)

        self.assertIn('Coordinate longitude is not a proper variable',
                      rd['PSa'][1])
        self.assertIn(
            "Coordinate latitude's dimension, latdim, is not a dimension of PSb",
            rd['PSb'][1])
        assert 'PSc' not in rd.keys()

    def test_check_horz_crs_grid_mappings_projections(self):
        dataset = self.get_pair(static_files['mapping'])
        results = self.cf.check_horz_crs_grid_mappings_projections(dataset)
        rd = {r.name[1]: r.value for r in results}
        assert rd['wgs84'] == (3, 3)
        assert rd['epsg'] == (7, 8)

    def test_check_scalar_coordinate_system(self):
        dataset = self.get_pair(static_files['scalar_coordinate_variable'])
        results = self.cf.check_scalar_coordinate_system(dataset)
        self.assertEqual(len(results), 2)
        for r in results:
            if r.name[1] == 'HEIGHT':
                self.assertEqual(r.value, (0, 1))
            elif r.name[1] == 'DEPTH':
                self.assertEqual(r.value, (2, 2))
            else:
                self.assertTrue(
                    False,
                    'Unexpected variable in results of check_scalar_coordinate_system'
                )

    def test_check_geographic_region(self):
        dataset = self.get_pair(static_files['bad_region'])
        results = self.cf.check_geographic_region(dataset)

        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)

    def test_check_alternative_coordinates(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_alternative_coordinates(dataset)
        self.assertTrue(results[0].value)

    #def test_check_cell_boundaries(self):
    #    dataset = self.get_pair(static_files['bad_data_type'])
    #    results = self.cf.check_cell_boundaries(dataset)
    #    print results
    #    self.assertTrue(results[0].value)

    def test_check_packed_data(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_packed_data(dataset)
        self.assertEqual(len(results), 4)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertTrue(results[2].value)
        self.assertFalse(results[3].value)

    def test_check_compression(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_compression(dataset)
        assert results[0].value == (2, 2)
        assert results[1].value == (0, 2)

    def test_check_all_features_are_same_type(self):
        dataset = self.get_pair(static_files['rutgers'])
        results = self.cf.check_all_features_are_same_type(dataset)
        assert results == None

        dataset = self.get_pair(static_files['featureType'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertTrue(results.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_all_features_are_same_type(dataset)
        self.assertFalse(results.value)

    def test_check_orthogonal_multidim_array(self):
        dataset = self.get_pair(static_files['rutgers'])
        results = self.cf.check_orthogonal_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_incomplete_multidim_array(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_incomplete_multidim_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_contiguous_ragged_array(self):
        dataset = self.get_pair(static_files['cont_ragged'])
        results = self.cf.check_contiguous_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_indexed_ragged_array(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_indexed_ragged_array(dataset)
        for each in results:
            self.assertTrue(each.value)

    def test_check_feature_type(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_feature_type(dataset)
        self.assertTrue(results.value)

        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_feature_type(dataset)
        self.assertFalse(results.value)

    def test_check_coordinates_and_metadata(self):
        dataset = self.get_pair(static_files['bad_data_type'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertFalse(results[0].value)
        self.assertTrue(results[1].value)
        self.assertFalse(results[2].value)

        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertTrue(results[-1].value)

        dataset = self.get_pair(static_files['coordinates_and_metadata'])
        results = self.cf.check_coordinates_and_metadata(dataset)
        self.assertTrue(len(results) == 2)
        self.assertFalse(results[0].value)
        self.assertFalse(results[1].value)

    def test_check_missing_data(self):
        dataset = self.get_pair(static_files['index_ragged'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertTrue(each.value)

        dataset = self.get_pair(static_files['bad_missing_data'])
        results = self.cf.check_missing_data(dataset)
        for each in results:
            self.assertFalse(each.value)

    def test_check_units(self):
        '''
        Ensure that container variables are not checked for units but geophysical variables are
        '''
        dataset = self.get_pair(static_files['units_check'])
        results = self.cf.check_units(dataset)

        # We don't keep track of the variables names for checks that passed, so
        # we can make a strict assertion about how many checks were performed
        # and if there were errors, which there shouldn't be.
        scored, out_of, messages = self.get_results(results)
        assert scored == 4
        assert out_of == 4
        assert messages == []

    def test_64bit(self):
        dataset = self.get_pair(static_files['ints64'])
        suite = CheckSuite()
        suite.checkers = {'cf': CFBaseCheck}
        suite.run(dataset, 'cf')

    def test_time_units(self):
        dataset = self.get_pair(static_files['time_units'])
        results = self.cf.check_units(dataset)
        scored, out_of, messages = self.get_results(results)
        assert u'units are days since 1970-01-01, standard_name units should be K' in messages
        assert scored == 1
        assert out_of == 2

    #--------------------------------------------------------------------------------
    # Utility Method Tests
    #--------------------------------------------------------------------------------

    def test_temporal_unit_conversion(self):
        self.assertTrue(units_convertible('hours', 'seconds'))
        self.assertFalse(units_convertible('hours', 'hours since 2000-01-01'))

    def test_units_temporal(self):
        self.assertTrue(units_temporal('hours since 2000-01-01'))
        self.assertFalse(units_temporal('hours'))
        self.assertFalse(units_temporal('days since the big bang'))

Exemple #8

0

Afficher le fichier

Fichier : test_cf.py Projet : petejan/compliance-checker

 def setUp(self):
     """
     Initialize the dataset
     """
     self.cf = CFBaseCheck()