def test_64bit(self): dataset = self.load_dataset(STATIC_FILES['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf')
def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['2dim']) vals = cs.run(ds, 'acdd')
def test_64bit(self): dataset = self.load_dataset(static_files['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf')
def test_64bit(self): dataset = self.load_dataset(STATIC_FILES['ints64']) suite = CheckSuite() suite.checkers = { 'cf' : CFBaseCheck } suite.run(dataset, 'cf')
def test_thredds(self): ''' Tests that a connection can be made to a remote THREDDS endpoint ''' url = 'http://data.ioos.us/thredds/dodsC/deployments/rutgers/ru24-20150105T1441/ru24-20150105T1441.nc3.nc' cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def test_sos(self): """ Tests that a connection can be made to an SOS endpoint """ url = "https://data.oceansmap.com/thredds/sos/caricoos_ag/VIA/VIA.ncml" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def test_hyrax(self): """ Tests that a connection can be made to Hyrax """ url = "http://ingria.coas.oregonstate.edu/opendap/hyrax/aggregated/ocean_time_aggregation.ncml" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def setUp(self): """ Initialize the dataset """ self.cs = CheckSuite() self.cs.load_all_available_checkers() # get current std names table version (it changes) self._std_names = util.StandardNameTable()
def test_erddap(self): """ Tests that a connection can be made to ERDDAP's GridDAP """ url = "http://coastwatch.pfeg.noaa.gov/erddap/griddap/osuChlaAnom" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def run_checker(cls, ds_loc, checker_names, verbose, criteria, skip_checks=None, output_filename='-', output_format='text'): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param skip_checks Names of checks to skip @param output_format Format of the output @returns If the tests failed (based on the criteria) """ cs = CheckSuite() ds = cs.load_dataset(ds_loc) score_groups = cs.run(ds, [] if skip_checks is None else skip_checks, *checker_names) if not score_groups: raise ValueError("No checks found, please check the name of the checker(s) and that they are installed") if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 if output_format == 'text': if output_filename == '-': groups = cls.stdout_output(cs, score_groups, verbose, limit) # need to redirect output from stdout since print functions are # presently used to generate the standard report output else: with io.open(output_filename, 'w', encoding='utf-8') as f: with stdout_redirector(f): groups = cls.stdout_output(cs, score_groups, verbose, limit) elif output_format == 'html': groups = cls.html_output(cs, score_groups, output_filename, ds_loc, limit) elif output_format == 'json': groups = cls.json_output(cs, score_groups, output_filename, ds_loc, limit) else: raise TypeError('Invalid format %s' % output_format) errors_occurred = cls.check_errors(score_groups, verbose) return cs.passtree(groups, limit), errors_occurred
def test_thredds(self): ''' Tests that a connection can be made to a remote THREDDS endpoint ''' url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def test_skip_checks(self): """Tests that checks are properly skipped when specified""" cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['2dim']) # exclude title from the check attributes score_groups = cs.run(ds, ['check_high'], 'acdd') assert all(sg.name not in {'Conventions', 'title', 'keywords', 'summary'} for sg in score_groups['acdd'][0])
def test_netcdf_content_type(self): """ Check that urls with Content-Type header of "application/x-netcdf" can successfully be read into memory for checks. """ url = "https://gliders.ioos.us/erddap/tabledap/amelia-20180501T0000.ncCF?&time%3E=max(time)-1%20hour" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None
def run_checker(cls, ds_loc, checker_names, verbose, criteria, skip_checks=None, output_filename='-', output_format='text'): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param skip_checks Names of checks to skip @param output_format Format of the output @returns If the tests failed (based on the criteria) """ cs = CheckSuite() ds = cs.load_dataset(ds_loc) score_groups = cs.run(ds, [] if skip_checks is None else skip_checks, *checker_names) if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 if output_format == 'text': if output_filename == '-': groups = cls.stdout_output(cs, score_groups, verbose, limit) # need to redirect output from stdout since print functions are # presently used to generate the standard report output else: with io.open(output_filename, 'w', encoding='utf-8') as f: with stdout_redirector(f): groups = cls.stdout_output(cs, score_groups, verbose, limit) elif output_format == 'html': groups = cls.html_output(cs, score_groups, output_filename, ds_loc, limit) elif output_format == 'json': groups = cls.json_output(cs, score_groups, output_filename, ds_loc, limit) else: raise TypeError('Invalid format %s' % output_format) errors_occurred = cls.check_errors(score_groups, verbose) return cs.passtree(groups, limit), errors_occurred
def run_checker(cls, ds_loc, checker_names, verbose, criteria, output_filename='stdout', output_format='stdout'): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param output_format Format of the output @returns If the tests failed (based on the criteria) """ retval = True cs = CheckSuite() ds = cs.load_dataset(ds_loc) score_groups = cs.run(ds, *checker_names) if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 if output_filename == '-' and output_format == 'text': groups = cls.stdout_output(cs, score_groups, verbose, limit) elif output_format == 'html': groups = cls.html_output(cs, score_groups, output_filename, ds_loc, limit) elif output_format == 'json': groups = cls.json_output(cs, score_groups, output_filename, ds_loc, limit) else: raise TypeError('Invalid format %s' % output_format) errors_occurred = cls.check_errors(score_groups, verbose) return cs.passtree(groups, limit), errors_occurred
def test_unicode_formatting(self): cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['bad_region']) score_groups = cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.iteritems(): groups, errors = rpair score_list, points, out_of = cs.standard_output(limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, points, out_of)
def setUp(self): with open(os.path.join(os.path.dirname(__file__), 'data/http_mocks/ncsos_describesensor.xml')) as f: self.resp = f.read() # need to monkey patch checkers prior to running tests, or no checker # classes will show up CheckSuite().load_all_available_checkers()
def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type cs = CheckSuite() res = [Result(BaseCheck.MEDIUM, True, 'one'), Result(BaseCheck.MEDIUM, (1, 3), 'one'), Result(BaseCheck.MEDIUM, None, 'one'), Result(BaseCheck.MEDIUM, True, 'two'), Result(BaseCheck.MEDIUM, np.isnan(1), 'two') # value is type numpy.bool_ ] score = cs.scores(res) self.assertEqual(score[0].name, 'one') self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, 'two') self.assertEqual(score[1].value, (1, 2))
def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type cs = CheckSuite() res = [ Result(BaseCheck.MEDIUM, True, 'one'), Result(BaseCheck.MEDIUM, (1, 3), 'one'), Result(BaseCheck.MEDIUM, None, 'one'), Result(BaseCheck.MEDIUM, True, 'two'), Result(BaseCheck.MEDIUM, np.isnan(1), 'two') # value is type numpy.bool_ ] score = cs.scores(res) self.assertEqual(score[0].name, 'one') self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, 'two') self.assertEqual(score[1].value, (1, 2))
def setUp(self): ''' Initialize the dataset ''' self.cs = CheckSuite() self.cs.load_all_available_checkers() # get current std names table version (it changes) self._std_names = util.StandardNameTable()
def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['2dim']) cs.run(ds, 'acdd')
def run_checker(self, checker, dataset_location): cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(dataset_location) score_groups = cs.run(ds, [], checker) results, self.errors = score_groups[checker] self.results = cs.build_structure(checker, results, dataset_location)
def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['bad_data_type']) score_groups = cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.iteritems(): groups, errors = rpair score_list, points, out_of = cs.standard_output(limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, points, out_of)
def run_checker(cls, ds_loc, checker_names, verbose, criteria, output_filename='stdout', output_format='stdout'): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param output_format Format of the output @returns If the tests failed (based on the criteria) """ retval = True cs = CheckSuite() ds = cs.load_dataset(ds_loc) score_groups = cs.run(ds, *checker_names) if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 if output_filename == '-' and output_format == 'text': groups = cls.stdout_output(cs, score_groups, verbose, limit) elif output_format == 'html': groups = cls.html_output(cs, score_groups, output_filename, ds_loc, limit) elif output_format == 'json': groups = cls.json_output(cs, score_groups, output_filename, ds_loc, limit) else: raise TypeError('Invalid format %s' % output_format) errors_occurred = cls.check_errors(score_groups, verbose) return cs.passtree(groups, limit), errors_occurred
def run_checker(self, checker, dataset_location): cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(dataset_location) score_groups = cs.run(ds, [], checker) results, self.errors = score_groups[checker] self.results = cs.build_structure(checker, results, dataset_location)
def test_skip_checks(self): """Tests that checks are properly skipped when specified""" cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['2dim']) # exclude title from the check attributes score_groups = cs.run(ds, ['check_high'], 'acdd') assert all(sg.name not in {'Conventions', 'title', 'keywords', 'summary'} for sg in score_groups['acdd'][0])
def test_standardise_cf(standardised): suite = CheckSuite() suite.load_all_available_checkers() ds = suite.load_dataset(standardised) results = suite.run(ds, [], 'cf') check_failures = 0 for r in results['cf'][0]: if r.value[1] - r.value[0] > 0: print(r, file=sys.stderr) check_failures += 1 assert check_failures == 0
class TestCFIntegration(BaseTestCase): def setUp(self): ''' Initialize the dataset ''' self.cs = CheckSuite() self.cs.load_all_available_checkers() # get current std names table version (it changes) self._std_names = util.StandardNameTable() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): ''' Return a loaded NC Dataset for the given path ''' if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, check_results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' aggregation = self.cs.build_structure('cf', check_results['cf'][0], 'test', 1) out_of = 0 scored = 0 results = aggregation['all_priorities'] for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages def test_sldmb_43093_agg(self): dataset = self.load_dataset(STATIC_FILES['sldmb_43093_agg']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 7 msgs = [ u'attribute time:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores', u'attribute lat:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores', u'attribute lon:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores', u'§2.6.2 global attribute history should exist and be a non-empty string', u'standard_name temperature is not defined in Standard Name Table v49', u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lat, is not a variable in this dataset", u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lon, is not a variable in this dataset" ] assert all(m in messages for m in msgs) @pytest.mark.slowtest def test_ocos(self): dataset = self.load_dataset(STATIC_FILES['ocos']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert len(messages) == 63 msgs = [ u"zeta's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u"ubar's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_u, xi_u", u"vbar's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_v, xi_v", u"u's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_rho, eta_u, xi_u", u"v's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_rho, eta_v, xi_v", u"w's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_w, eta_rho, xi_rho", u"temp's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_rho, eta_rho, xi_rho", u"salt's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_rho, eta_rho, xi_rho", u"AKv's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_w, eta_rho, xi_rho", u"AKt's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_w, eta_rho, xi_rho", u"AKs's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_w, eta_rho, xi_rho", u"tke's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, s_w, eta_rho, xi_rho", u"shflux's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u"latent's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u"sensible's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u"lwrad's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u"swrad's dimensions are not in the recommended order T, Z, Y, X. They are ocean_time, eta_rho, xi_rho", u'§2.6.1 Conventions global attribute does not contain "CF-1.6". The CF Checker only supports CF-1.6 at this time.', u"units (None) attribute of 's_w' must be a string compatible with UDUNITS", u"units (None) attribute of 's_rho' must be a string compatible with UDUNITS", u"units (None) attribute of 'Cs_w' must be a string compatible with UDUNITS", u"units (None) attribute of 'user' must be a string compatible with UDUNITS", u"units (None) attribute of 'Cs_r' must be a string compatible with UDUNITS", u"CF recommends latitude variable 'lat_rho' to use units degrees_north", u"CF recommends latitude variable 'lat_u' to use units degrees_north", u"CF recommends latitude variable 'lat_v' to use units degrees_north", u"CF recommends latitude variable 'lat_psi' to use units degrees_north", u"CF recommends longitude variable 'lon_rho' to use units degrees_east", u"CF recommends longitude variable 'lon_u' to use units degrees_east", u"CF recommends longitude variable 'lon_v' to use units degrees_east", u"CF recommends longitude variable 'lon_psi' to use units degrees_east", u'Unidentifiable feature for variable dt', u'Unidentifiable feature for variable dtfast', u'Unidentifiable feature for variable dstart', u'Unidentifiable feature for variable nl_tnu2', u'Unidentifiable feature for variable nl_visc2', u'Unidentifiable feature for variable Akt_bak', u'Unidentifiable feature for variable Akv_bak', u'Unidentifiable feature for variable Akk_bak', u'Unidentifiable feature for variable Akp_bak', u'Unidentifiable feature for variable rdrg', u'Unidentifiable feature for variable Zob', u'Unidentifiable feature for variable Zos', u'Unidentifiable feature for variable Znudg', u'Unidentifiable feature for variable M2nudg', u'Unidentifiable feature for variable M3nudg', u'Unidentifiable feature for variable Tnudg', u'Unidentifiable feature for variable FSobc_in', u'Unidentifiable feature for variable FSobc_out', u'Unidentifiable feature for variable M2obc_in', u'Unidentifiable feature for variable M2obc_out', u'Unidentifiable feature for variable Tobc_in', u'Unidentifiable feature for variable Tobc_out', u'Unidentifiable feature for variable M3obc_in', u'Unidentifiable feature for variable M3obc_out', u'Unidentifiable feature for variable rho0', u'Unidentifiable feature for variable xl', u'Unidentifiable feature for variable el', u'Unidentifiable feature for variable Tcline', u'Unidentifiable feature for variable hc', u'Unidentifiable feature for variable Cs_r', u'Unidentifiable feature for variable Cs_w', u'Unidentifiable feature for variable user' ] assert all([m in messages for m in msgs]) def test_l01_met(self): dataset = self.load_dataset(STATIC_FILES['l01-met']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 16 # The variable is supposed to be a status flag but it's mislabled msgs = [ u'units for variable air_temperature_qc must be convertible to K currently they are 1', u'units for variable wind_speed_qc must be convertible to m s-1 currently they are 1', u'standard_name visibility is not defined in Standard Name Table v49', u'standard_name modifier data_quality for variable visibility_qc is not a valid modifier according to appendix C', u'standard_name wind_direction is not defined in Standard Name Table v49', u'standard_name modifier data_quality for variable wind_direction_qc is not a valid modifier according to appendix C', u'standard_name wind_gust is not defined in Standard Name Table v49', u'standard_name modifier data_quality for variable wind_gust_qc is not a valid modifier according to appendix C', u'standard_name modifier data_quality for variable air_temperature_qc is not a valid modifier according to appendix C', u'standard_name use_wind is not defined in Standard Name Table v49', u'standard_name barometric_pressure is not defined in Standard Name Table v49', u'standard_name modifier data_quality for variable barometric_pressure_qc is not a valid modifier according to appendix C', u'standard_name modifier data_quality for variable wind_speed_qc is not a valid modifier according to appendix C', u'standard_name barometric_pressure is not defined in Standard Name Table v49', u"CF recommends latitude variable 'lat' to use units degrees_north", u"CF recommends longitude variable 'lon' to use units degrees_east" ] assert all(m in messages for m in msgs) def test_usgs_dem_saipan(self): dataset = self.load_dataset(STATIC_FILES['usgs_dem_saipan']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 1 msgs = [ u'§2.6.1 Conventions global attribute does not contain "CF-1.6". The CF Checker only supports CF-1.6 at this time.' ] assert all(m in messages for m in msgs) def test_sp041(self): dataset = self.load_dataset(STATIC_FILES['sp041']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 3 assert (u"lat_qc is not a variable in this dataset") in messages for i, msg in enumerate(messages): if msg.startswith("Different feature types"): break else: assert False, "'Different feature types discovered' was not found in the checker messages" def test_3mf07(self): """Load the 3mf07.nc file and run the CF check suite on it. There should be several variable/attribute combos which fail: - latitude:valid min - latitude:valid_max - longitude:valid_min - longitude:valid_max - references is an empty string - comment (global attr) is an empty string - z:dimensions are not a proper subset of dims for variable flag, haul - variable flag/haul has an unidentifiable feature""" dataset = self.load_dataset(STATIC_FILES['3mf07']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) msgs = [ u'latitude:valid_min must be a numeric type not a string', u'latitude:valid_max must be a numeric type not a string', u'longitude:valid_min must be a numeric type not a string', u'longitude:valid_max must be a numeric type not a string', u'§2.6.2 references global attribute should be a non-empty string', u'§2.6.2 comment global attribute should be a non-empty string', u'dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable flag (profile)', u'dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable haul (profile)', u'Unidentifiable feature for variable flag', u'Unidentifiable feature for variable haul' ] assert scored < out_of assert all(m in messages for m in msgs) def test_ooi_glider(self): dataset = self.load_dataset(STATIC_FILES['ooi_glider']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 5 msgs = [ u'§2.6.2 comment global attribute should be a non-empty string', u"units (None) attribute of 'deployment' must be a string compatible with UDUNITS", u'Attribute long_name or/and standard_name is highly recommended for variable deployment', u"latitude variable 'latitude' should define standard_name='latitude' or axis='Y'", u"longitude variable 'longitude' should define standard_name='longitude' or axis='X'" ] assert all(m in messages for m in msgs) def test_swan(self): dataset = self.load_dataset(STATIC_FILES['swan']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 10 msgs = [ u'global attribute _CoordSysBuilder should begin with a letter and be composed of letters, digits, and underscores', u'§2.6.1 Conventions global attribute does not contain "CF-1.6". The CF Checker only supports CF-1.6 at this time.', u'units for variable time_offset must be convertible to s currently they are hours since 2013-02-18T00:00:00Z', u'units for variable time_run must be convertible to s currently they are hours since 2013-02-18 00:00:00.000 UTC', u"lon's axis attribute must be T, X, Y, or Z, currently x", "lat's axis attribute must be T, X, Y, or Z, currently y", u"z's axis attribute must be T, X, Y, or Z, currently z", u"z: vertical coordinates not defining pressure must include a positive attribute that is either 'up' or 'down'", u'GRID is not a valid CF featureType. It must be one of point, timeseries, trajectory, profile, timeseriesprofile, trajectoryprofile', u'Unidentifiable feature for variable time_offset' ] assert all(m in messages for m in msgs) def test_kibesillah(self): dataset = self.load_dataset(STATIC_FILES['kibesillah']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 1 # test for global attributes (CF 2.6.2) assert (u"§2.6.2 global attribute title should exist and be a non-empty string") in messages def test_pr_inundation(self): dataset = self.load_dataset(STATIC_FILES['pr_inundation']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 21 msgs = [ u"waterlevel's dimensions are not in the recommended order T, Z, Y, X. They are time, m, n", u"velocity_x's dimensions are not in the recommended order T, Z, Y, X. They are time, Layer, m, n", u"velocity_y's dimensions are not in the recommended order T, Z, Y, X. They are time, Layer, m, n", u"tau_x's dimensions are not in the recommended order T, Z, Y, X. They are time, m, n", u"tau_y's dimensions are not in the recommended order T, Z, Y, X. They are time, m, n", u'§2.6.2 grid_depth:comment should be a non-empty string', u'§2.6.2 depth:comment should be a non-empty string', u'§2.6.2 institution global attribute should be a non-empty string', u'§2.6.2 comment global attribute should be a non-empty string', u"units (None) attribute of 'LayerInterf' must be a string compatible with UDUNITS", u"units (None) attribute of 'time_bounds' must be a string compatible with UDUNITS", u"units (None) attribute of 'Layer' must be a string compatible with UDUNITS", u'units for variable area must be convertible to m2 currently they are degrees2', u"k: vertical coordinates not defining pressure must include a positive attribute that is either 'up' or 'down'", u'grid_longitude is not associated with a coordinate defining true latitude and sharing a subset of dimensions', u'grid_longitude is not associated with a coordinate defining true longitude and sharing a subset of dimensions', u'grid_latitude is not associated with a coordinate defining true latitude and sharing a subset of dimensions', u'grid_latitude is not associated with a coordinate defining true longitude and sharing a subset of dimensions', u'time_bounds might be a cell boundary variable but there are no variables that define it as a boundary using the `bounds` attribute.', u'Unidentifiable feature for variable time_bounds', u'Unidentifiable feature for variable grid_depth' ] assert all(m in messages for m in msgs) def test_fvcom(self): dataset = self.load_dataset(STATIC_FILES['fvcom']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 40 for msg in messages: if msg.startswith("dimensions for auxiliary coordinate variable siglay"): break # it's not clear to me what this is supposed to be doing -- this else clause is outside of the if else: raise AssertionError(u"\"dimensions for auxiliary coordinate variable siglay (node, siglay) " "are not a subset of dimensions for variable u (siglay, nele, time)\"" " not in messages") assert (u"Unidentifiable feature for variable x") in messages assert (u'§2.6.1 Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') in messages def test_ww3(self): dataset = self.load_dataset(STATIC_FILES['ww3']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 8 msgs = [ u'§2.6.2 global attribute title should exist and be a non-empty string', u'§2.6.2 global attribute history should exist and be a non-empty string', u'§2.6.1 Conventions field is not present', u'Attribute long_name or/and standard_name is highly recommended for variable time', u'Attribute long_name or/and standard_name is highly recommended for variable lon', u'Attribute long_name or/and standard_name is highly recommended for variable lat', u"latitude variable 'lat' should define standard_name='latitude' or axis='Y'", u"longitude variable 'lon' should define standard_name='longitude' or axis='X'" ] assert all(m in messages for m in msgs) def test_glcfs(self): dataset = self.load_dataset(STATIC_FILES['glcfs']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert len(messages) == 14 assert (u"units for variable time_offset must be convertible to s currently " "they are hours since 2016-01-01T12:00:00Z") in messages assert (u"standard_name cloud_cover is not defined in Standard Name Table v{}".format(self._std_names._version)) in messages assert (u"standard_name dew_point is not defined in Standard Name Table v{}".format(self._std_names._version)) in messages assert (u"GRID is not a valid CF featureType. It must be one of point, timeseries, " "trajectory, profile, timeseriesprofile, trajectoryprofile") in messages assert (u"global attribute _CoordSysBuilder should begin with a letter and " "be composed of letters, digits, and underscores") in messages assert (u"source should be defined") assert (u'units for cl, "fraction" are not recognized by UDUNITS') in messages def test_ncei_templates(self): """ Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ dataset = self.load_dataset(STATIC_FILES['NCEI_profile_template_v2_0']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert scored < out_of def test_bad_cf_roles(self): ''' Tests the CF checker detects datasets with more than 2 defined cf_role variables. ''' dataset = self.load_dataset(STATIC_FILES['bad_cf_role']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) msgs = [ u'§2.6.2 global attribute title should exist and be a non-empty string', u'§2.6.2 global attribute history should exist and be a non-empty string', u'§2.6.1 Conventions field is not present', u'Unidentifiable feature for variable T', u'§9.5 The only acceptable values of cf_role for Discrete Geometry CF data sets are timeseries_id, profile_id, and trajectory_id' ] assert scored < out_of assert all(m in messages for m in msgs)
def run_checker(cls, ds_loc, checker_names, verbose, criteria, skip_checks=None, output_filename='-', output_format=['text']): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param skip_checks Names of checks to skip @param output_format Format of the output(s) @returns If the tests failed (based on the criteria) """ all_groups = [] cs = CheckSuite() # using OrderedDict is important here to preserve the order # of multiple datasets which may be passed in score_dict = OrderedDict() if not isinstance(ds_loc, six.string_types): locs = ds_loc # if single dataset, put in list else: locs = [ds_loc] # Make sure output format is a list if isinstance(output_format, six.string_types): output_format = [output_format] for loc in locs: # loop through each dataset and run specified checks ds = cs.load_dataset(loc) score_groups = cs.run(ds, skip_checks, *checker_names) for group in score_groups.values(): all_groups.append(group[0]) # TODO: consider wrapping in a proper context manager instead if hasattr(ds, 'close'): ds.close() if not score_groups: raise ValueError("No checks found, please check the name of the checker(s) and that they are installed") else: score_dict[loc] = score_groups # define a score limit to truncate the ouput to the strictness level # specified by the user if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 for out_fmt in output_format: if out_fmt == 'text': if output_filename == '-': cls.stdout_output(cs, score_dict, verbose, limit) # need to redirect output from stdout since print functions are # presently used to generate the standard report output else: if len(output_format) > 1: # Update file name if needed output_filename = '{}.txt'.format(os.path.splitext(output_filename)[0]) with io.open(output_filename, 'w', encoding='utf-8') as f: with stdout_redirector(f): cls.stdout_output(cs, score_dict, verbose, limit) elif out_fmt == 'html': # Update file name if needed if len(output_format) > 1 and output_filename != '-': output_filename = '{}.html'.format(os.path.splitext(output_filename)[0]) cls.html_output(cs, score_dict, output_filename, ds_loc, limit) elif out_fmt in {'json', 'json_new'}: # Update file name if needed if len(output_format) > 1 and output_filename != '-': output_filename = '{}.json'.format(os.path.splitext(output_filename)[0]) cls.json_output(cs, score_dict, output_filename, ds_loc, limit, out_fmt) else: raise TypeError('Invalid format %s' % out_fmt) errors_occurred = cls.check_errors(score_groups, verbose) return (all(cs.passtree(groups, limit) for groups in all_groups), errors_occurred)
class TestSuite(unittest.TestCase): # @see # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/ def setUp(self): self.cs = CheckSuite() self.cs.load_all_available_checkers() def shortDescription(self): return None # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() name = name.split('.') if name[0] not in ["ion", "pyon"]: return "%s (%s)" % (name[-1], '.'.join(name[:-1])) else: return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:])) __str__ = __repr__ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files['2dim']) self.cs.run(ds, 'acdd') def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files['bad_region']) score_groups = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_skip_checks(self): """Tests that checks are properly skipped when specified""" ds = self.cs.load_dataset(static_files['2dim']) # exclude title from the check attributes score_groups = self.cs.run(ds, ['check_high'], 'acdd') assert all(sg.name not in {'Conventions', 'title', 'keywords', 'summary'} for sg in score_groups['acdd'][0]) def test_skip_check_level(self): """Checks level limited skip checks""" ds = self.cs.load_dataset(static_files['ru07']) score_groups = self.cs.run(ds, ['check_flags:A', 'check_convention_possibly_var_attrs:M', 'check_standard_name:L'], 'cf') name_set = {sg.name for sg in score_groups['cf'][0]} # flattened set of messages msg_set = {msg for sg in score_groups['cf'][0] for msg in sg.msgs} expected_excluded_names = {u'§3.5 flag_meanings for lat', u'§3.5 flag_meanings for lon', u'§3.5 lat is a valid flags variable', u'§3.5 lat is a valid flags variable', u'§3.5 lon is a valid flags variable'} self.assertTrue(len(expected_excluded_names & name_set) == 0) # should skip references ref_msg = u'references global attribute should be a non-empty string' self.assertTrue(ref_msg not in msg_set) # check_standard_name is high priority, but we requested only low, # so the standard_name check should still exist standard_name_hdr = u'§3.3 Standard Name' self.assertTrue(standard_name_hdr in name_set) def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files['bad_data_type']) score_groups = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type res = [ Result(BaseCheck.MEDIUM, True, 'one'), Result(BaseCheck.MEDIUM, (1, 3), 'one'), Result(BaseCheck.MEDIUM, None, 'one'), Result(BaseCheck.MEDIUM, True, 'two'), Result(BaseCheck.MEDIUM, np.isnan(1), 'two') # value is type numpy.bool_ ] score = self.cs.scores(res) self.assertEqual(score[0].name, 'one') self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, 'two') self.assertEqual(score[1].value, (1, 2)) def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files['test_cdl']) vals = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, cdl_points, cdl_out_of, checker) ds.close() # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files['test_cdl_nc']) vals = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, nc_points, nc_out_of, checker) ds.close() nc_file_path = static_files['test_cdl'].replace('.cdl', '.nc') self.addCleanup(os.remove, nc_file_path) # Ok the scores should be equal! self.assertEqual(nc_points, cdl_points) self.assertEqual(nc_out_of, cdl_out_of) def test_load_local_dataset_GenericFile(self): resp = self.cs.load_local_dataset(static_files['empty']) assert isinstance(resp, GenericFile) == True def test_standard_output_score_header(self): """ Check that the output score header only checks the number of of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files['bad_region']) score_groups = self.cs.run(ds, [], 'cf') limit = 2 groups, errors = score_groups['cf'] score_list, all_passed, out_of = self.cs.standard_output( ds.filepath(), limit, 'cf', groups) assert all_passed < out_of
def setUp(self): self.cs = CheckSuite() self.cs.load_all_available_checkers()
def glider_deployment_check(data_type=None, completed=True, force=False, deployment_dir=None, username=None): """ """ cs = CheckSuite() cs.load_all_available_checkers() with app.app_context(): if data_type is not None: is_delayed_mode = data_type == 'delayed' if is_delayed_mode: q_dict = {"delayed_mode": True, "completed": completed} else: q_dict = { "$or": [{ "delayed_mode": False }, { "delayed_mode": { "$exists": False } }], "completed": completed } if not force: q_dict["compliance_check_passed"] = {"$ne": True} # TODO: combine username/deployment cases? if username: q_dict = {"username": username} # a particular deployment has been specified elif deployment_dir: q_dict = {"deployment_dir": deployment_dir} else: q_dict = {} agg_pipeline = [{ "$match": q_dict }, { "$group": { "_id": "$user_id", "deployments": { "$push": { "_id": "$_id", "name": "$name", "deployment_dir": "$deployment_dir" } } } }] # if force is enabled, re-check the datasets no matter what # is this syntax still used? if the first fn call fails, use the # second set of results try: agg_result_set = db.deployments.aggregate(agg_pipeline)['result'] except: agg_result_set = db.deployments.aggregate(agg_pipeline, cursor={}) for res in agg_result_set: user = db.users.find_one(res["_id"]) all_messages = [] failing_deployments = [] for dep in res['deployments']: root_logger.info("Running compliance check on glider " "deployment: {}".format(dep)) try: dep_passed, dep_messages = process_deployment(dep) all_messages.append(dep_messages) if not dep_passed: failing_deployments.append(dep) except Exception as e: root_logger.exception( "Exception occurred while processing deployment {}". format(dep['name'])) text_body = '' send_deployment_cchecker_email(user, failing_deployments, "\n".join(all_messages))
def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file cs = CheckSuite() cs.load_all_available_checkers() # Load the cdl file ds = cs.load_dataset(static_files['test_cdl']) vals = cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = cs.standard_output( limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, cdl_points, cdl_out_of) ds.close() # Ok now load the nc file that it came from ds = cs.load_dataset(static_files['test_cdl_nc']) vals = cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = cs.standard_output( limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, nc_points, nc_out_of) ds.close() nc_file_path = static_files['test_cdl'].replace('.cdl', '.nc') self.addCleanup(os.remove, nc_file_path) # Ok the scores should be equal! self.assertEqual(nc_points, cdl_points) self.assertEqual(nc_out_of, cdl_out_of)
def test_suite(self): cs = CheckSuite() cs.load_all_available_checkers() ds = cs.load_dataset(static_files['2dim']) vals = cs.run(ds, 'acdd')
def test_64bit(self): dataset = self.get_pair(static_files['ints64']) suite = CheckSuite() suite.checkers = {'cf': CFBaseCheck} suite.run(dataset, 'cf')
class TestCFIntegration(BaseTestCase): def setUp(self): """ Initialize the dataset """ self.cs = CheckSuite() self.cs.load_all_available_checkers() # get current std names table version (it changes) self._std_names = util.StandardNameTable() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): """ Make a new temporary netCDF file for the scope of the test """ nc_file_path = os.path.join(gettempdir(), "example.nc") if os.path.exists(nc_file_path): raise IOError("File Exists: %s" % nc_file_path) nc = Dataset(nc_file_path, "w") self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): """ Return a loaded NC Dataset for the given path """ if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, "r") self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, check_results): """ Returns a tuple of the value scored, possible, and a list of messages in the result set. """ aggregation = self.cs.build_structure("cf", check_results["cf"][0], "test", 1) out_of = 0 scored = 0 results = aggregation["all_priorities"] for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages def test_sldmb_43093_agg(self): dataset = self.load_dataset(STATIC_FILES["sldmb_43093_agg"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ u"attribute time:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", u"attribute lat:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", u"attribute lon:_CoordianteAxisType should begin with a letter and be composed of letters, digits, and underscores", u"§2.6.2 global attribute history should exist and be a non-empty string", u"standard_name temperature is not defined in Standard Name Table v{}" .format(self._std_names._version), u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lat, is not a variable in this dataset", u"temperature's auxiliary coordinate specified by the coordinates attribute, precise_lon, is not a variable in this dataset", ] assert all(m in messages for m in expected_messages) @pytest.mark.slowtest def test_ocos(self): dataset = self.load_dataset(STATIC_FILES["ocos"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) expected_messages = [ "AKs's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_w (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "AKt's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_w (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "AKv's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_w (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "latent's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "lwrad's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "salt's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_rho (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "sensible's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "shflux's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "swrad's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "temp's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_rho (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "tke's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_w (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "u's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_rho (Z), eta_u (A), xi_u (A) (with U: other/unknown; L: unlimited).", "ubar's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_u (A), xi_u (A) (with U: other/unknown; L: unlimited).", "v's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_rho (Z), eta_v (A), xi_v (A) (with U: other/unknown; L: unlimited).", "vbar's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_v (A), xi_v (A) (with U: other/unknown; L: unlimited).", "w's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), s_w (Z), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", "zeta's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are ocean_time (T), eta_rho (A), xi_rho (A) (with U: other/unknown; L: unlimited).", '§2.6.1 Conventions global attribute does not contain "CF-1.7"', "units (None) attribute of 's_w' must be a string compatible with UDUNITS", "units (None) attribute of 's_rho' must be a string compatible with UDUNITS", "units (None) attribute of 'Cs_w' must be a string compatible with UDUNITS", "units (None) attribute of 'user' must be a string compatible with UDUNITS", "units (None) attribute of 'Cs_r' must be a string compatible with UDUNITS", "CF recommends latitude variable 'lat_rho' to use units degrees_north", "CF recommends latitude variable 'lat_u' to use units degrees_north", "CF recommends latitude variable 'lat_v' to use units degrees_north", "CF recommends latitude variable 'lat_psi' to use units degrees_north", "CF recommends longitude variable 'lon_rho' to use units degrees_east", "CF recommends longitude variable 'lon_u' to use units degrees_east", "CF recommends longitude variable 'lon_v' to use units degrees_east", "CF recommends longitude variable 'lon_psi' to use units degrees_east", "Unidentifiable feature for variable nl_tnu2", "Unidentifiable feature for variable Akt_bak", "Unidentifiable feature for variable Tnudg", "Unidentifiable feature for variable FSobc_in", "Unidentifiable feature for variable FSobc_out", "Unidentifiable feature for variable M2obc_in", "Unidentifiable feature for variable M2obc_out", "Unidentifiable feature for variable Tobc_in", "Unidentifiable feature for variable Tobc_out", "Unidentifiable feature for variable M3obc_in", "Unidentifiable feature for variable M3obc_out", "Unidentifiable feature for variable Cs_r", "Unidentifiable feature for variable Cs_w", "Unidentifiable feature for variable user", "§4.3.3 The standard_name of `s_rho` must map to the correct computed_standard_name, `['altitude', 'height_above_geopotential_datum', 'height_above_mean_sea_level', 'height_above_reference_ellipsoid']`", "§4.3.3 The standard_name of `s_w` must map to the correct computed_standard_name, `['altitude', 'height_above_geopotential_datum', 'height_above_mean_sea_level', 'height_above_reference_ellipsoid']`" ] assert set(messages).issubset(set(expected_messages)) def test_l01_met(self): dataset = self.load_dataset(STATIC_FILES["l01-met"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of # The variable is supposed to be a status flag but it's mislabled expected_messages = [ "units for variable air_temperature_qc must be convertible to K currently they are 1", "units for variable wind_speed_qc must be convertible to m s-1 currently they are 1", "standard_name visibility is not defined in Standard Name Table v{}" .format(self._std_names._version), "standard_name modifier data_quality for variable visibility_qc is not a valid modifier according to appendix C", "standard_name wind_direction is not defined in Standard Name Table v{}" .format(self._std_names._version), "standard_name modifier data_quality for variable wind_direction_qc is not a valid modifier according to appendix C", "standard_name wind_gust is not defined in Standard Name Table v{}" .format(self._std_names._version), "standard_name modifier data_quality for variable wind_gust_qc is not a valid modifier according to appendix C", "standard_name modifier data_quality for variable air_temperature_qc is not a valid modifier according to appendix C", "standard_name use_wind is not defined in Standard Name Table v{}". format(self._std_names._version), "standard_name barometric_pressure is not defined in Standard Name Table v{}" .format(self._std_names._version), "standard_name modifier data_quality for variable barometric_pressure_qc is not a valid modifier according to appendix C", "standard_name modifier data_quality for variable wind_speed_qc is not a valid modifier according to appendix C", "standard_name barometric_pressure is not defined in Standard Name Table v{}" .format(self._std_names._version), "CF recommends latitude variable 'lat' to use units degrees_north", "CF recommends longitude variable 'lon' to use units degrees_east", ] assert all(m in messages for m in expected_messages) def test_usgs_dem_saipan(self): dataset = self.load_dataset(STATIC_FILES["usgs_dem_saipan"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ '§2.6.1 Conventions global attribute does not contain "CF-1.7"' ] assert all(m in messages for m in expected_messages) def test_sp041(self): dataset = self.load_dataset(STATIC_FILES["sp041"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of assert (u"lat_qc is not a variable in this dataset") in messages for i, msg in enumerate(messages): if msg.startswith("Different feature types"): break else: assert ( False ), "'Different feature types discovered' was not found in the checker messages" def test_3mf07(self): """Load the 3mf07.nc file and run the CF check suite on it. There should be several variable/attribute combos which fail: - latitude:valid min - latitude:valid_max - longitude:valid_min - longitude:valid_max - references is an empty string - comment (global attr) is an empty string - z:dimensions are not a proper subset of dims for variable flag, haul - variable flag/haul has an unidentifiable feature""" dataset = self.load_dataset(STATIC_FILES["3mf07"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) expected_messages = [ u"latitude:valid_min must be a numeric type not a string", u"latitude:valid_max must be a numeric type not a string", u"longitude:valid_min must be a numeric type not a string", u"longitude:valid_max must be a numeric type not a string", u"§2.6.2 references global attribute should be a non-empty string", u"§2.6.2 comment global attribute should be a non-empty string", u"dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable flag (profile)", u"dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for variable haul (profile)", u"Unidentifiable feature for variable flag", u"Unidentifiable feature for variable haul", ] assert scored < out_of assert all(m in messages for m in expected_messages) def test_ooi_glider(self): dataset = self.load_dataset(STATIC_FILES["ooi_glider"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ u"§2.6.2 comment global attribute should be a non-empty string", u"units (None) attribute of 'deployment' must be a string compatible with UDUNITS", u"Attribute long_name or/and standard_name is highly recommended for variable deployment", u"latitude variable 'latitude' should define standard_name='latitude' or axis='Y'", u"longitude variable 'longitude' should define standard_name='longitude' or axis='X'", ] assert all(m in messages for m in expected_messages) def test_swan(self): dataset = self.load_dataset(STATIC_FILES["swan"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ "global attribute _CoordSysBuilder should begin with a letter and be composed of letters, digits, and underscores", '§2.6.1 Conventions global attribute does not contain "CF-1.7"', "units for variable time_offset must be convertible to s currently they are hours since 2013-02-18T00:00:00Z", "units for variable time_run must be convertible to s currently they are hours since 2013-02-18 00:00:00.000 UTC", "lon's axis attribute must be T, X, Y, or Z, currently x", "lat's axis attribute must be T, X, Y, or Z, currently y", "z's axis attribute must be T, X, Y, or Z, currently z", "z: vertical coordinates not defining pressure must include a positive attribute that is either 'up' or 'down'", "GRID is not a valid CF featureType. It must be one of point, timeseries, trajectory, profile, timeseriesprofile, trajectoryprofile", ] assert all(m in messages for m in expected_messages) def test_kibesillah(self): dataset = self.load_dataset(STATIC_FILES["kibesillah"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of # test for global attributes (CF 2.6.2) assert ( u"§2.6.2 global attribute title should exist and be a non-empty string" ) in messages def test_pr_inundation(self): dataset = self.load_dataset(STATIC_FILES["pr_inundation"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ "waterlevel's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are time (T), m (A), n (A) (with U: other/unknown; L: unlimited).", "velocity_x's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are time (T), Layer (Z), m (A), n (A) (with U: other/unknown; L: unlimited).", "velocity_y's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are time (T), Layer (Z), m (A), n (A) (with U: other/unknown; L: unlimited).", "tau_x's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are time (T), m (A), n (A) (with U: other/unknown; L: unlimited).", "tau_y's spatio-temporal dimensions are not in the recommended order T, Z, Y, X and/or further dimensions are not located left of T, Z, Y, X. The dimensions (and their guessed types) are time (T), m (A), n (A) (with U: other/unknown; L: unlimited).", "§2.6.2 grid_depth:comment should be a non-empty string", "§2.6.2 depth:comment should be a non-empty string", "§2.6.2 institution global attribute should be a non-empty string", "§2.6.2 comment global attribute should be a non-empty string", "units (None) attribute of 'LayerInterf' must be a string compatible with UDUNITS", "units (None) attribute of 'time_bounds' must be a string compatible with UDUNITS", "units (None) attribute of 'Layer' must be a string compatible with UDUNITS", "units for variable area must be convertible to m2 currently they are degrees2", "k: vertical coordinates not defining pressure must include a positive attribute that is either 'up' or 'down'", "grid_longitude has no coordinate associated with a variable identified as true latitude/longitude; its coordinate variable should also share a subset of grid_longitude's dimensions", "grid_latitude has no coordinate associated with a variable identified as true latitude/longitude; its coordinate variable should also share a subset of grid_latitude's dimensions", "time_bounds might be a cell boundary variable but there are no variables that define it as a boundary using the `bounds` attribute.", "Unidentifiable feature for variable time_bounds", "Unidentifiable feature for variable grid_depth", ] assert set(expected_messages).issubset(messages) def test_fvcom(self): dataset = self.load_dataset(STATIC_FILES["fvcom"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of for msg in messages: if msg.startswith( "dimensions for auxiliary coordinate variable siglay"): break # it's not clear to me what this is supposed to be doing -- this else clause is outside of the if else: raise AssertionError( u'"dimensions for auxiliary coordinate variable siglay (node, siglay) ' 'are not a subset of dimensions for variable u (siglay, nele, time)"' " not in messages") assert (u"Unidentifiable feature for variable x") in messages assert ('§2.6.1 Conventions global attribute does not contain "CF-1.7"' ) in messages def test_ww3(self): dataset = self.load_dataset(STATIC_FILES["ww3"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of expected_messages = [ u"§2.6.2 global attribute title should exist and be a non-empty string", u"§2.6.2 global attribute history should exist and be a non-empty string", u"§2.6.1 Conventions field is not present", u"Attribute long_name or/and standard_name is highly recommended for variable time", u"Attribute long_name or/and standard_name is highly recommended for variable lon", u"Attribute long_name or/and standard_name is highly recommended for variable lat", u"latitude variable 'lat' should define standard_name='latitude' or axis='Y'", u"longitude variable 'lon' should define standard_name='longitude' or axis='X'", ] assert all(m in messages for m in expected_messages) def test_glcfs(self): dataset = self.load_dataset(STATIC_FILES["glcfs"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of # TODO: referenced/relative time is treated like time units assert ( "units for variable time_offset must be convertible to s currently " "they are hours since 2016-01-01T12:00:00Z") in messages assert ( "standard_name cloud_cover is not defined in Standard Name Table v{}" .format(self._std_names._version)) in messages assert ( u"standard_name dew_point is not defined in Standard Name Table v{}" .format(self._std_names._version)) in messages assert ( u"GRID is not a valid CF featureType. It must be one of point, timeseries, " "trajectory, profile, timeseriesprofile, trajectoryprofile" ) in messages assert ( u"global attribute _CoordSysBuilder should begin with a letter and " "be composed of letters, digits, and underscores") in messages assert u"source should be defined" assert (u'units for cl, "fraction" are not recognized by UDUNITS' ) in messages def test_ncei_templates(self): """ Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ dataset = self.load_dataset(STATIC_FILES["NCEI_profile_template_v2_0"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) assert scored < out_of def test_bad_cf_roles(self): """ Tests the CF checker detects datasets with more than 2 defined cf_role variables. """ dataset = self.load_dataset(STATIC_FILES["bad_cf_role"]) check_results = self.cs.run(dataset, [], "cf") scored, out_of, messages = self.get_results(check_results) expected_messages = [ u"§2.6.2 global attribute title should exist and be a non-empty string", u"§2.6.2 global attribute history should exist and be a non-empty string", u"§2.6.1 Conventions field is not present", u"Unidentifiable feature for variable T", u"§9.5 The only acceptable values of cf_role for Discrete Geometry CF data sets are timeseries_id, profile_id, and trajectory_id", ] assert scored < out_of assert all(m in messages for m in expected_messages)
def setUp(self): ''' Initialize the dataset ''' self.cs = CheckSuite() self.cs.load_all_available_checkers()
class TestCFIntegration(BaseTestCase): def setUp(self): ''' Initialize the dataset ''' self.cs = CheckSuite() self.cs.load_all_available_checkers() # -------------------------------------------------------------------------------- # Helper Methods # -------------------------------------------------------------------------------- def new_nc_file(self): ''' Make a new temporary netCDF file for the scope of the test ''' nc_file_path = os.path.join(gettempdir(), 'example.nc') if os.path.exists(nc_file_path): raise IOError('File Exists: %s' % nc_file_path) nc = Dataset(nc_file_path, 'w') self.addCleanup(os.remove, nc_file_path) self.addCleanup(nc.close) return nc def load_dataset(self, nc_dataset): ''' Return a loaded NC Dataset for the given path ''' if not isinstance(nc_dataset, str): raise ValueError("nc_dataset should be a string") nc_dataset = Dataset(nc_dataset, 'r') self.addCleanup(nc_dataset.close) return nc_dataset def get_results(self, check_results): ''' Returns a tuple of the value scored, possible, and a list of messages in the result set. ''' aggregation = self.cs.build_structure('cf', check_results['cf'][0], 'test', 1) out_of = 0 scored = 0 results = aggregation['all_priorities'] for r in results: if isinstance(r.value, tuple): out_of += r.value[1] scored += r.value[0] else: out_of += 1 scored += int(r.value) # Store the messages messages = [] for r in results: messages.extend(r.msgs) return scored, out_of, messages def test_sldmb_43093_agg(self): dataset = self.load_dataset(STATIC_FILES['sldmb_43093_agg']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (139, 147) assert len(messages) == 8 assert u'standard_name temperature is not defined in Standard Name Table v36' in messages assert ( u'auxiliary coordinate specified by the coordinates attribute, precise_lat, ' 'is not a variable in this dataset') in messages assert ( u'auxiliary coordinate specified by the coordinates attribute, precise_lon, ' 'is not a variable in this dataset') in messages assert ( u'attribute time:_CoordianteAxisType should begin with a letter and be composed ' 'of letters, digits, and underscores') in messages @pytest.mark.slowtest def test_ocos(self): dataset = self.load_dataset(STATIC_FILES['ocos']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (1840, 1841) assert len(messages) == 41 assert (u'Unidentifiable feature for variable Akt_bak') in messages assert (u'Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') in messages assert ( u"CF recommends latitude variable 'lat_psi' to use units degrees_north" ) in messages def test_l01_met(self): dataset = self.load_dataset(STATIC_FILES['l01-met']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (590, 604) assert len(messages) == 16 # The variable is supposed to be a status flag but it's mislabled assert ( u'units for variable air_temperature_qc must be convertible to K currently they are 1' ) in messages assert ( u'standard_name barometric_pressure is not defined in Standard Name Table v36' ) in messages assert ( u'standard_name use_wind is not defined in Standard Name Table v36' ) in messages assert ( u'standard_name modifier data_quality is not a valid modifier according to appendix C' ) in messages assert ( u"CF recommends latitude variable 'lat' to use units degrees_north" ) in messages def test_usgs_dem_saipan(self): dataset = self.load_dataset(STATIC_FILES['usgs_dem_saipan']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (109, 110) assert len(messages) == 1 assert (u'Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') in messages def test_sp041(self): dataset = self.load_dataset(STATIC_FILES['sp041']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (1189, 1194) assert len(messages) == 5 assert (u"lat_qc is not a variable in this dataset") in messages assert ( u"TrajectoryProfile is not a valid CF featureType. It must be one of point, " "timeSeries, trajectory, profile, timeSeriesProfile, trajectoryProfile" ) in messages for i, msg in enumerate(messages): if msg.startswith("Different feature types"): break else: assert False, "'Different feature types discovered' was not found in the checker messages" def test_3mf07(self): dataset = self.load_dataset(STATIC_FILES['3mf07']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (420, 428) assert len(messages) == 10 assert ( u"dimensions for auxiliary coordinate variable z (z) are not a subset of dimensions for " "variable flag (profile)") in messages assert (u"Unidentifiable feature for variable flag") in messages assert (u"references global attribute should be a non-empty string" ) in messages assert (u"comment global attribute should be a non-empty string" ) in messages assert (u"latitude:valid_min must be a numeric type not a string" ) in messages assert (u"latitude:valid_max must be a numeric type not a string" ) in messages assert (u"longitude:valid_min must be a numeric type not a string" ) in messages assert (u"longitude:valid_max must be a numeric type not a string" ) in messages def test_ooi_glider(self): dataset = self.load_dataset(STATIC_FILES['ooi_glider']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (595, 599) assert len(messages) == 4 assert ( u"variable deployment's attribute standard_name must be a non-empty string or " "it should define a long_name attribute.") in messages assert (u"comment global attribute should be a non-empty string" ) in messages assert ( u"latitude variable 'latitude' should define standard_name='latitude' or axis='Y'" ) in messages assert ( u"longitude variable 'longitude' should define standard_name='longitude' or axis='X'" ) in messages def test_swan(self): dataset = self.load_dataset(STATIC_FILES['swan']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (363, 372) assert len(messages) == 10 assert ( u"units for variable time_offset must be convertible to s currently they are hours " "since 2013-02-18T00:00:00Z") in messages assert ( u"axis attribute must be T, X, Y, or Z, currently y") in messages assert ( u"vertical coordinates not defining pressure must include a positive attribute that " "is either 'up' or 'down'") in messages assert ( u"GRID is not a valid CF featureType. It must be one of point, timeSeries, " "trajectory, profile, timeSeriesProfile, trajectoryProfile" ) in messages assert (u'Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') in messages def test_kibesillah(self): dataset = self.load_dataset(STATIC_FILES['kibesillah']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (199, 202) assert len(messages) == 3 assert (u"source should be defined") in messages assert (u"references should be defined") in messages assert ( u"global attribute title should exist and be a non-empty string" ) in messages def test_pr_inundation(self): dataset = self.load_dataset(STATIC_FILES['pr_inundation']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (549, 557) assert len(messages) == 11 assert (u"units for variable area must be convertible to m2 currently " "they are degrees2") in messages assert ( u"vertical coordinates not defining pressure must include a positive " "attribute that is either 'up' or 'down'") in messages assert (u"Unidentifiable feature for variable time_bounds") in messages assert ( u"grid_longitude's dimensions are not in the recommended order T, " "Z, Y, X. They are m, n, bounds4") in messages assert (u"depth:comment should be a non-empty string") in messages assert (u"institution global attribute should be a non-empty string" ) in messages assert ( u"time_bounds might be a cell boundary variable but there are no variables that " "define it as a boundary using the `bounds` attribute." ) in messages def test_fvcom(self): dataset = self.load_dataset(STATIC_FILES['fvcom']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (646, 653) assert len(messages) == 26 for msg in messages: if msg.startswith( "dimensions for auxiliary coordinate variable siglay"): break else: raise AssertionError( u"\"dimensions for auxiliary coordinate variable siglay (node, siglay) " "are not a subset of dimensions for variable u (siglay, nele, time)\"" " not in messages") assert (u"Unidentifiable feature for variable x") in messages assert (u'Conventions global attribute does not contain ' '"CF-1.6". The CF Checker only supports CF-1.6 ' 'at this time.') in messages assert (u"siglay shares the same name as one of its dimensions" ) in messages def test_ww3(self): dataset = self.load_dataset(STATIC_FILES['ww3']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (111, 121) assert len(messages) == 10 assert ( u"variable lat's attribute standard_name must be a non-empty string or it " "should define a long_name attribute.") in messages assert (u"Conventions field is not present") in messages assert ( u"latitude variable 'lat' should define standard_name='latitude' or axis='Y'" ) in messages def test_glcfs(self): dataset = self.load_dataset(STATIC_FILES['glcfs']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (330, 339) assert len(messages) == 10 assert ( u"units for variable time_offset must be convertible to s currently " "they are hours since 2016-01-01T12:00:00Z") in messages assert ( u"standard_name cloud_cover is not defined in Standard Name Table v36" ) in messages assert ( u"standard_name dew_point is not defined in Standard Name Table v36" ) in messages assert (u"variable eta referenced by formula_terms does not exist" ) in messages assert ( u"GRID is not a valid CF featureType. It must be one of point, timeSeries, " "trajectory, profile, timeSeriesProfile, trajectoryProfile" ) in messages assert ( u"global attribute _CoordSysBuilder should begin with a letter and " "be composed of letters, digits, and underscores") in messages assert (u"source should be defined") assert (u'units for cl, "fraction" are not recognized by udunits' ) in messages def test_ncei_templates(self): """ Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ dataset = self.load_dataset(STATIC_FILES['NCEI_profile_template_v2_0']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (344, 348) def test_bad_cf_roles(self): ''' Tests the CF checker detects datasets with more than 2 defined cf_role variables ''' dataset = self.load_dataset(STATIC_FILES['bad_cf_role']) check_results = self.cs.run(dataset, [], 'cf') scored, out_of, messages = self.get_results(check_results) assert (scored, out_of) == (92, 100) assert ('§9.5 states that datasets should not contain more than two ' 'variables defining a cf_role attribute.') in messages
def run_checker(cls, ds_loc, checker_names, verbose, criteria, skip_checks=None, output_filename='-', output_format=['text']): """ Static check runner. @param ds_loc Dataset location (url or file) @param checker_names List of string names to run, should match keys of checkers dict (empty list means run all) @param verbose Verbosity of the output (0, 1, 2) @param criteria Determines failure (lenient, normal, strict) @param output_filename Path to the file for output @param skip_checks Names of checks to skip @param output_format Format of the output(s) @returns If the tests failed (based on the criteria) """ cs = CheckSuite() # using OrderedDict is important here to preserve the order # of multiple datasets which may be passed in score_dict = OrderedDict() if not isinstance(ds_loc, six.string_types): locs = ds_loc # if single dataset, put in list else: locs = [ds_loc] # Make sure output format is a list if isinstance(output_format, six.string_types): output_format = [output_format] for loc in locs: ds = cs.load_dataset(loc) score_groups = cs.run(ds, [] if skip_checks is None else skip_checks, *checker_names) if not score_groups: raise ValueError("No checks found, please check the name of the checker(s) and that they are installed") else: score_dict[loc] = score_groups if criteria == 'normal': limit = 2 elif criteria == 'strict': limit = 1 elif criteria == 'lenient': limit = 3 for out_fmt in output_format: if out_fmt == 'text': if output_filename == '-': groups = cls.stdout_output(cs, score_dict, verbose, limit) # need to redirect output from stdout since print functions are # presently used to generate the standard report output else: if len(output_format) > 1: # Update file name if needed output_filename = '{}.txt'.format(os.path.splitext(output_filename)[0]) with io.open(output_filename, 'w', encoding='utf-8') as f: with stdout_redirector(f): groups = cls.stdout_output(cs, score_dict, verbose, limit) elif out_fmt == 'html': # Update file name if needed if len(output_format) > 1 and output_filename != '-': output_filename = '{}.html'.format(os.path.splitext(output_filename)[0]) groups = cls.html_output(cs, score_dict, output_filename, ds_loc, limit) elif out_fmt == 'json' or 'json_new': # Update file name if needed if len(output_format) > 1 and output_filename != '-': output_filename = '{}.json'.format(os.path.splitext(output_filename)[0]) groups = cls.json_output(cs, score_dict, output_filename, ds_loc, limit, out_fmt) else: raise TypeError('Invalid format %s' % out_fmt) errors_occurred = cls.check_errors(score_groups, verbose) return cs.passtree(groups, limit), errors_occurred
class TestSuite(unittest.TestCase): # @see # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/ def setUp(self): self.cs = CheckSuite() self.cs.load_all_available_checkers() def shortDescription(self): return None # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() name = name.split('.') if name[0] not in ["ion", "pyon"]: return "%s (%s)" % (name[-1], '.'.join(name[:-1])) else: return "%s ( %s )" % (name[-1], '.'.join(name[:-2]) + ":" + '.'.join(name[-2:])) __str__ = __repr__ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files['2dim']) self.cs.run(ds, 'acdd') def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files['bad_region']) score_groups = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_skip_checks(self): """Tests that checks are properly skipped when specified""" ds = self.cs.load_dataset(static_files['2dim']) # exclude title from the check attributes score_groups = self.cs.run(ds, ['check_high'], 'acdd') assert all(sg.name not in {'Conventions', 'title', 'keywords', 'summary'} for sg in score_groups['acdd'][0]) def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files['bad_data_type']) score_groups = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type res = [ Result(BaseCheck.MEDIUM, True, 'one'), Result(BaseCheck.MEDIUM, (1, 3), 'one'), Result(BaseCheck.MEDIUM, None, 'one'), Result(BaseCheck.MEDIUM, True, 'two'), Result(BaseCheck.MEDIUM, np.isnan(1), 'two') # value is type numpy.bool_ ] score = self.cs.scores(res) self.assertEqual(score[0].name, 'one') self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, 'two') self.assertEqual(score[1].value, (1, 2)) def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files['test_cdl']) vals = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, cdl_points, cdl_out_of, checker) ds.close() # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files['test_cdl_nc']) vals = self.cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = self.cs.standard_output(ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, nc_points, nc_out_of, checker) ds.close() nc_file_path = static_files['test_cdl'].replace('.cdl', '.nc') self.addCleanup(os.remove, nc_file_path) # Ok the scores should be equal! self.assertEqual(nc_points, cdl_points) self.assertEqual(nc_out_of, cdl_out_of) def test_load_local_dataset_GenericFile(self): resp = self.cs.load_local_dataset(static_files['empty']) assert isinstance(resp, GenericFile) == True def test_standard_output_score_header(self): """ Check that the output score header only checks the number of of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files['bad_region']) score_groups = self.cs.run(ds, [], 'cf') limit = 2 groups, errors = score_groups['cf'] score_list, all_passed, out_of = self.cs.standard_output( ds.filepath(), limit, 'cf', groups) self.assertEqual((all_passed, out_of), (30, 47))
def setUp(self): self.cs = CheckSuite() self.cs.load_all_available_checkers()
class TestSuite(unittest.TestCase): # @see # http://www.saltycrane.com/blog/2012/07/how-prevent-nose-unittest-using-docstring-when-verbosity-2/ def setUp(self): self.cs = CheckSuite() self.cs.load_all_available_checkers() def shortDescription(self): return None # override __str__ and __repr__ behavior to show a copy-pastable nosetest name for ion tests # ion.module:TestClassName.test_function_name def __repr__(self): name = self.id() name = name.split(".") if name[0] not in ["ion", "pyon"]: return "%s (%s)" % (name[-1], ".".join(name[:-1])) else: return "%s ( %s )" % ( name[-1], ".".join(name[:-2]) + ":" + ".".join(name[-2:]), ) __str__ = __repr__ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files["2dim"]) self.cs.run(ds, "acdd") def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files["bad_region"]) score_groups = self.cs.run(ds, "cf") limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output( ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_generate_dataset_netCDF4(self): """ Tests that suite.generate_dataset works with cdl file with netCDF4 features. """ # create netCDF4 file ds_name = self.cs.generate_dataset(static_files["netCDF4"]) # check if correct name is return assert ds_name == static_files["netCDF4"].replace(".cdl", ".nc") # check if netCDF4 file was created assert os.path.isfile(static_files["netCDF4"].replace(".cdl", ".nc")) def test_skip_checks(self): """Tests that checks are properly skipped when specified""" ds = self.cs.load_dataset(static_files["2dim"]) # exclude title from the check attributes score_groups = self.cs.run(ds, ["check_high"], "acdd") assert all( sg.name not in {"Conventions", "title", "keywords", "summary"} for sg in score_groups["acdd"][0]) def test_skip_check_level(self): """Checks level limited skip checks""" ds = self.cs.load_dataset(static_files["ru07"]) score_groups = self.cs.run( ds, [ "check_flags:A", "check_convention_possibly_var_attrs:M", "check_standard_name:L", ], "cf", ) name_set = {sg.name for sg in score_groups["cf"][0]} # flattened set of messages msg_set = {msg for sg in score_groups["cf"][0] for msg in sg.msgs} expected_excluded_names = { u"§3.5 flag_meanings for lat", u"§3.5 flag_meanings for lon", u"§3.5 lat is a valid flags variable", u"§3.5 lat is a valid flags variable", u"§3.5 lon is a valid flags variable", } self.assertTrue(len(expected_excluded_names & name_set) == 0) # should skip references ref_msg = u"references global attribute should be a non-empty string" self.assertTrue(ref_msg not in msg_set) # check_standard_name is high priority, but we requested only low, # so the standard_name check should still exist standard_name_hdr = u"§3.3 Standard Name" self.assertTrue(standard_name_hdr in name_set) def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files["bad_data_type"]) score_groups = self.cs.run(ds, "cf") limit = 2 for checker, rpair in score_groups.items(): groups, errors = rpair score_list, points, out_of = self.cs.standard_output( ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, points, out_of, checker) def test_score_grouping(self): # Testing the grouping of results for output, which can fail # if some assumptions are not met, e.g. if a Result object has # a value attribute of unexpected type res = [ Result(BaseCheck.MEDIUM, True, "one"), Result(BaseCheck.MEDIUM, (1, 3), "one"), Result(BaseCheck.MEDIUM, None, "one"), Result(BaseCheck.MEDIUM, True, "two"), Result(BaseCheck.MEDIUM, np.isnan(1), "two"), # value is type numpy.bool_ ] score = self.cs.scores(res) self.assertEqual(score[0].name, "one") self.assertEqual(score[0].value, (2, 4)) self.assertEqual(score[1].name, "two") self.assertEqual(score[1].value, (1, 2)) def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files["test_cdl"]) vals = self.cs.run(ds, "cf") limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = self.cs.standard_output( ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, cdl_points, cdl_out_of, checker) ds.close() # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files["test_cdl_nc"]) vals = self.cs.run(ds, "cf") limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = self.cs.standard_output( ds.filepath(), limit, checker, groups) # This asserts that print is able to generate all of the unicode output self.cs.standard_output_generation(groups, limit, nc_points, nc_out_of, checker) ds.close() nc_file_path = static_files["test_cdl"].replace(".cdl", ".nc") self.addCleanup(os.remove, nc_file_path) # Ok the scores should be equal! self.assertEqual(nc_points, cdl_points) self.assertEqual(nc_out_of, cdl_out_of) def test_load_local_dataset_GenericFile(self): resp = self.cs.load_local_dataset(static_files["empty"]) assert isinstance(resp, GenericFile) == True def test_standard_output_score_header(self): """ Check that the output score header only checks the number of of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files["bad_region"]) score_groups = self.cs.run(ds, [], "cf") limit = 2 groups, errors = score_groups["cf"] score_list, all_passed, out_of = self.cs.standard_output( ds.filepath(), limit, "cf", groups) assert all_passed < out_of def test_netCDF4_features(self): """ Check if a proper netCDF4 file with netCDF4-datatypes is created. """ # create and open dataset ds = self.cs.load_dataset(static_files["netCDF4"]) # check if netCDF type of global attributes is correct assert isinstance(ds.global_att_of_type_int, np.int32) # check if netCDF4 type of global attributes is correct assert isinstance(ds.global_att_of_type_int64, np.int64) # check if netCDF type of variable is correct assert ds["tas"].dtype is np.dtype("float32") # check if netCDF4 type of variable is correct assert ds["mask"].dtype is np.dtype("int64")
def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file cs = CheckSuite() cs.load_all_available_checkers() # Load the cdl file ds = cs.load_dataset(static_files['test_cdl']) vals = cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, cdl_points, cdl_out_of = cs.standard_output(limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, cdl_points, cdl_out_of) # Ok now load the nc file that it came from ds = cs.load_dataset(static_files['test_cdl_nc']) vals = cs.run(ds, 'cf') limit = 2 for checker, rpair in vals.items(): groups, errors = rpair score_list, nc_points, nc_out_of = cs.standard_output(limit, checker, groups) # This asserts that print is able to generate all of the unicode output cs.non_verbose_output_generation(score_list, groups, limit, nc_points, nc_out_of) nc_file_path = static_files['test_cdl'].replace('.cdl', '.nc') self.addCleanup(os.remove, nc_file_path) # Ok the scores should be equal! self.assertEqual(nc_points, cdl_points) self.assertEqual(nc_out_of, cdl_out_of)
def test_load_local_dataset_GenericFile(self): cs = CheckSuite() resp = cs.load_local_dataset(static_files['empty']) assert isinstance(resp, GenericFile) == True