def setUp(self): """bootstrap""" self.client = WoudcClient()
def metadata_validator(self): """ Robust validator to check metadata against values in WOUDC database. Returns a dictionary of errors or warnings depending on severity of violation. :returns: Dictionary of the form: {'status' : True if validation passed, False otherwise. 'warnings' : a list of warnings. 'errors': a list of errors. } """ error_dict = {'status': False, 'warnings': [], 'errors': []} client = WoudcClient() # Attempt basic validation LOGGER.debug('Attempting basic table validation.') error, violations = global_validate(self.sections) if error: error_dict['errors'].append('The following violations were \ found: \n %s' % '\n'.join(violations)) return error_dict LOGGER.info('Basic table validation passed.') # Check that Content Category and Level are Valid LOGGER.debug('Checking Content Category and Level.') category = self.sections['CONTENT']['Category'] level = self.sections['CONTENT']['Level'] if category != 'UmkehrN14' and level != '1.0' and level != '1': error_dict['errors'].append('Level for category %s must \ be 1.0' % category) return error_dict if category == 'UmkehrN14' and 'N14_VALUES' in self.sections and level != '1.0' and level != '1': # noqa error_dict['errors'].append('Level for category UmkehrN14 \ with table N14_VALUES must be 1.0') return error_dict if category == 'UmkehrN14' and 'C_PROFILE' in self.sections and level != '2.0' and level != '2': # noqa error_dict['errors'].append('Level for category UmkehrN14 \ with table N14_VALUES must be 1.0') return error_dict LOGGER.info('Content Category and Level are valid.') # Attempt further basic validation given category and level LOGGER.debug('Attempting specific table validation for category %s and level %s.' % (category, level)) # noqa error, violations = global_validate(self.sections, category, '1' if level == '1.0' else '2', '1') # noqa if error: error_dict['errors'].append('The following violations were \ found: \n %s' % '\n'.join(violations)) return error_dict LOGGER.info('Further table validation passed.') # Check agency and platform information LOGGER.debug('Resolving Agency and Platform information.') f_type = self.sections['PLATFORM']['Type'] f_ID = self.sections['PLATFORM']['ID'] f_name = self.sections['PLATFORM']['Name'].encode('utf-8') f_country = self.sections['PLATFORM']['Country'] f_gaw_id = None try: f_gaw_id = self.sections['PLATFORM']['GAW_ID'] except Exception: error_dict['warnings'].append('GAW_ID field is \ spelled incorrectly.') f_agency = self.sections['DATA_GENERATION']['Agency'] f_lat = float(self.sections['LOCATION']['Latitude']) f_lon = float(self.sections['LOCATION']['Longitude']) agency_params = {'property_name': 'acronym', 'property_value': f_agency} # noqa platform_id_params = {'property_name' : 'platform_id', 'property_value' : f_ID} # noqa platform_name_params = {'property_name' : 'platform_name', 'property_value' : f_name} # noqa if client.get_data('stations', **agency_params) is None: agency_params['property_name'] = 'contributor_name' data = client.get_data('stations', **agency_params) if data is None: acronym_set = Set() LOGGER.debug('Resolving Agency through platform ID.') data = client.get_data('stations', **platform_id_params) if data is not None: for row in data['features']: properties = row['properties'] acronym_set.add(properties['acronym']) LOGGER.debug('Resolving Agency through platform name.') data = client.get_data('stations', **platform_name_params) if data is not None: for row in data['features']: properties = row['properties'] acronym_set.add(properties['acronym']) if acronym_set != Set(): LOGGER.info('Possible Agency matches found.') error_dict['errors'].append('The following agencies \ match the given platform name and/or ID: %s' % ','.join(list(acronym_set))) return error_dict LOGGER.info('No Agency matches found.') error_dict['errors'].append('Agency acronym of %s not \ found in the woudc database. If this is a new agency, \ please notify WOUDC' % f_agency) return error_dict else: LOGGER.info('Agency name used instead of acronym.') acronym = data['features'][0]['properties']['acronym'] error_dict['errors'].append('Please use the \ Agency acronym of %s.' % acronym) # noqa return error_dict LOGGER.info('Successfully validated Agency.') LOGGER.debug('Resolving platform information.') data = client.get_data('stations', **platform_id_params) flag = False a_set = Set() if data is not None: for row in data['features']: properties = row['properties'] a_set.add(properties['acronym']) if properties['acronym'] == f_agency: if properties['platform_type'] != f_type: error_dict['errors'].append('Platform type \ of %s does not match database. Please change it \ to %s' % (f_type, properties['platform_type'])) return error_dict if properties['country_code'] != f_country: error_dict['errors'].append('Platform country \ of %s does not match database. Please change it \ to %s' % (f_country, properties['country_code'])) return error_dict if properties['platform_name'].encode('utf-8') != f_name: error_dict['errors'].append('Platform name \ of %s does not match database. Please change it \ to %s' % (f_name, properties['platform_name'].encode('utf-8'))) return error_dict if abs(float(row['geometry']['coordinates'][0]) - f_lon) >= 1: # noqa error_dict['errors'].append('Location Longitude \ of %s does not match database. Please change it \ to %s.' % (f_lon, row['geometry']['coordinates'][0])) return error_dict if abs(float(row['geometry']['coordinates'][1]) - f_lat) >= 1: # noqa error_dict['errors'].append('Location Latitude \ of %s does not match database. Please change it \ to %s.' % (f_lat, row['geometry']['coordinates'][1])) return error_dict if properties['gaw_id'] != f_gaw_id: error_dict['warnings'].append('Platform GAW_ID \ of %s does not match database. Please change it \ to %s' % (f_gaw_id, properties['gaw_id'])) LOGGER.info('Successfully validated platform.') flag = True if not flag: data = client.get_data('stations', **platform_name_params) if data is not None: for row in data['features']: properties = row['properties'] a_set.add(properties['acronym']) if properties['acronym'] == f_agency: if properties['platform_type'] != f_type: error_dict['errors'].append('Platform type \ of %s does not match database. Please change it \ to %s' % (f_type, properties['platform_type'])) return error_dict if properties['country_code'] != f_country: error_dict['errors'].append('Platform country \ of %s does not match database. Please change it \ to %s' % (f_country, properties['country_code'])) return error_dict if properties['platform_id'] != f_ID: error_dict['errors'].append('Platform ID \ of %s does not match database. Please change it \ to %s' % (f_ID, properties['platform_id'])) return error_dict if abs(float(row['geometry']['coordinates'][0]) - f_lon) >= 1: # noqa error_dict['errors'].append('Location Longitude \ of %s does not match database. Please change it \ to %s.' % (f_lon, row['geometry']['coordinates'][0])) return error_dict if abs(float(row['geometry']['coordinates'][1]) - f_lat) >= 1: # noqa error_dict['errors'].append('Location Latitude \ of %s does not match database. Please change it \ to %s.' % (f_lat, row['geometry']['coordinates'][1])) return error_dict if properties['gaw_id'] != f_gaw_id: error_dict['warnings'].append('Platform GAW_ID \ of %s does not match database. Please change it \ to %s' % (f_gaw_id, properties['gaw_id'])) LOGGER.info('Successfully validated platform.') flag = True if not flag: LOGGER.info('Failed to validate platform.') if len(a_set) > 0: error_dict['errors'].append('Agency and Platform \ information do not match. These agencies are valid for this \ platform: %s' % (','.join(list(a_set)))) return error_dict else: error_dict['errors'].append('Could not find a record \ for either the platform name or ID. If this is a new station, \ please notify WOUDC.') return error_dict # Check existence of instrument Name and model LOGGER.debug('Resolving Instrument information.') inst_name = self.sections['INSTRUMENT']['Name'].lower() inst_model = self.sections['INSTRUMENT']['Model'] inst_model_upper = inst_model.upper() inst_name_params = {'property_name': 'instrument_name', 'property_value': inst_name} # noqa inst_model_params = {'property_name': 'instrument_model', 'property_value': inst_model} # noqa data = client.get_data('instruments', **inst_name_params) if data is None: LOGGER.info('Failed to located Instrument name.') error_dict['errors'].append('Instrument Name is not in database. \ Please verify that it is correct.\nNote: If the instrument name is valid, \ this file will be rejected and then manually processed into the database.') return error_dict else: # Check if a new uri needs to be generated found = False for row in data['features']: properties = row['properties'] if properties['contributor_id'] == f_agency and properties['data_category'] == category and properties['data_level'] == float(level): # noqa found = True if not found: error_dict['warnings'].append('This is a new instrument \ class/data_category/data_level for this agency.\nThe \ file will be rejected and then manually processed into the database.') data = client.get_data('instruments', **inst_model_params) if data is None: inst_model_params['property_value'] = inst_model_upper data = client.get_data('instruments', **inst_model_params) if data is None: inst_model_params['property_value'] = inst_model.title() data = client.get_data('instruments', **inst_model_params) if data is None: LOGGER.info('Failed to located Instrument model.') error_dict['errors'].append('Instrument Model \ is not in database. Please verify that it is correct.\nNote: If \ the instrument model is valid, this file will be rejected and then \ manually processed into the database.') return error_dict LOGGER.info('Instrument verification passed.') # Check for trailing commas in payload LOGGER.debug('Checking payload for trailing commas.') payload_tables = [] for table in self.sections.keys(): if len(self.sections[table].keys()) == 1: payload_tables.append(table) for payload_table in payload_tables: payload_lines = self.sections[payload_table]['_raw'].split('\n') header_len = len(payload_lines[0].strip().strip(',').split(',')) - 1 # noqa fewer_commas = False for line in payload_lines: if line.count(',') > header_len: LOGGER.info('Found trailing commas.') error_dict['errors'].append('This file has extra \ trailing commas. Please remove them before submitting.\nFirst line in \ file with trailing commas:\n%s' % line) return error_dict if line.count(',') < header_len and line.strip() != '': fewer_commas = True if fewer_commas: error_dict['warnings'].append('Some lines in this file have \ fewer commas than there are headers.\nPlease consider adding in extra \ commas for readability.') LOGGER.info('No trailing commas found.') LOGGER.info('This file passed validation.') error_dict['status'] = True return error_dict
class WoudcClientTest(unittest.TestCase): """Test suite for package pywoudc.WoudcClient""" def setUp(self): """bootstrap""" self.client = WoudcClient() def tearDown(self): """destroy""" pass def test_smoke_test(self): """test basic properties""" self.assertEqual(self.client.url, 'https://geo.woudc.org/ows', 'Expected specific URL') self.assertEqual(self.client.about, 'https://woudc.org/about/data-access.php', 'Expected specific about URL') self.assertEqual(self.client.outputformat, 'application/json; subtype=geojson', 'Expected specific default outputformat') self.assertEqual(self.client.maxfeatures, 25000, 'Expected specific default maxfeatures') self.assertEqual(self.client.timeout, 30, 'Expected specific default timeout') self.assertTrue(isinstance(self.client.server, WebFeatureService_1_1_0), 'Expected specific instance') def test_get_metadata(self): """test get various requests for metadata""" for typename in ['stations', 'contributors']: data = self.client._get_metadata(typename) self.assertTrue(isinstance(data, dict), 'Expected specific instance') self.assertTrue('type' in data, 'Expected GeoJSON header') self.assertEqual(data['type'], 'FeatureCollection', 'Expected GeoJSON header') self.assertTrue('features' in data, 'Expected GeoJSON header') self.assertTrue(len(data['features']) > 0, 'Expected non-empty %s list' % typename) raw_data = self.client._get_metadata(typename, raw=True) self.assertTrue(isinstance(raw_data, str), 'Expected specific instance') self.assertTrue('"type": "FeatureCollection"' in raw_data, 'Expected raw GeoJSON response') def test_get_data(self): """test get data handling""" dataset = 'totalozone' bad_bbox = [42, -52, 84] self.assertRaises(ValueError, self.client.get_data, dataset, bbox=bad_bbox) self.assertRaises(ValueError, self.client.get_data, dataset, bbox='-142,42,-53,84') self.assertRaises(ValueError, self.client.get_data, dataset, temporal='2000-11-11/2001-10-30') self.assertRaises(ValueError, self.client.get_data, dataset, temporal=['2000-11-11']) self.assertRaises(ValueError, self.client.get_data, dataset, sort_order='bad') self.assertRaises(ValueError, self.client.get_data, dataset, variables='foo') def test_date2string(self): """test date handling""" self.assertEqual(date2string('2000-10-10', 'begin'), '2000-10-10 00:00:00', 'Expected specific date string from date string') self.assertEqual(date2string('2001-11-11', 'end'), '2001-11-11 23:59:59', 'Expected specific date string from date string') self.assertEqual(date2string('2000-10-10 02:22:28'), '2000-10-10 02:22:28', 'Expected specific date string from datetime string') self.assertEqual(date2string('2001-11-11 11:33:24'), '2001-11-11 11:33:24', 'Expected specific date string from datetime string') self.assertEqual(date2string(datetime.date(2000, 11, 30), 'begin'), '2000-11-30 00:00:00', 'Expected specific date string from date object') self.assertEqual(date2string(datetime.date(2011, 11, 30), 'end'), '2011-11-30 23:59:59', 'Expected specific date string from date object') self.assertEqual(date2string( datetime.datetime(2002, 10, 30, 11, 11, 11)), '2002-10-30 11:11:11', 'Expected specific date string from datetime object') self.assertEqual(date2string( datetime.datetime(2011, 11, 30, 12, 12, 12)), '2011-11-30 12:12:12', 'Expected specific date string from datetime object')
class WoudcClientTest(unittest.TestCase): """Test suite for package pywoudc.WoudcClient""" def setUp(self): """bootstrap""" self.client = WoudcClient() def tearDown(self): """destroy""" pass def test_smoke_test(self): """test basic properties""" self.assertEqual(self.client.url, "http://geo.woudc.org/ows", "Expected specific URL") self.assertEqual(self.client.about, "http://woudc.org/about/data-access.php", "Expected specific about URL") self.assertEqual( self.client.outputformat, "application/json; subtype=geojson", "Expected specific default outputformat" ) self.assertEqual(self.client.maxfeatures, 25000, "Expected specific default maxfeatures") self.assertEqual(self.client.timeout, 30, "Expected specific default timeout") self.assertTrue(isinstance(self.client.server, WebFeatureService_1_1_0), "Expected specific instance") def test_get_metadata(self): """test get various requests for metadata""" for typename in ["stations", "contributors"]: data = self.client._get_metadata(typename) self.assertTrue(isinstance(data, dict), "Expected specific instance") self.assertTrue("type" in data, "Expected GeoJSON header") self.assertEqual(data["type"], "FeatureCollection", "Expected GeoJSON header") self.assertTrue("features" in data, "Expected GeoJSON header") self.assertTrue(len(data["features"]) > 0, "Expected non-empty %s list" % typename) raw_data = self.client._get_metadata(typename, raw=True) self.assertTrue(isinstance(raw_data, str), "Expected specific instance") self.assertTrue('"type": "FeatureCollection"' in raw_data, "Expected raw GeoJSON response") def test_get_data(self): """test get data handling""" dataset = "totalozone" bad_bbox = [42, -52, 84] self.assertRaises(ValueError, self.client.get_data, dataset, bbox=bad_bbox) self.assertRaises(ValueError, self.client.get_data, dataset, bbox="-142,42,-53,84") self.assertRaises(ValueError, self.client.get_data, dataset, temporal="2000-11-11/2001-10-30") self.assertRaises(ValueError, self.client.get_data, dataset, temporal=["2000-11-11"]) self.assertRaises(ValueError, self.client.get_data, dataset, sort_order="bad") self.assertRaises(ValueError, self.client.get_data, dataset, variables="foo") def test_date2string(self): """test date handling""" self.assertEqual( date2string("2000-10-10", "begin"), "2000-10-10 00:00:00", "Expected specific date string from date string" ) self.assertEqual( date2string("2001-11-11", "end"), "2001-11-11 23:59:59", "Expected specific date string from date string" ) self.assertEqual( date2string("2000-10-10 02:22:28"), "2000-10-10 02:22:28", "Expected specific date string from datetime string", ) self.assertEqual( date2string("2001-11-11 11:33:24"), "2001-11-11 11:33:24", "Expected specific date string from datetime string", ) self.assertEqual( date2string(datetime.date(2000, 11, 30), "begin"), "2000-11-30 00:00:00", "Expected specific date string from date object", ) self.assertEqual( date2string(datetime.date(2011, 11, 30), "end"), "2011-11-30 23:59:59", "Expected specific date string from date object", ) self.assertEqual( date2string(datetime.datetime(2002, 10, 30, 11, 11, 11)), "2002-10-30 11:11:11", "Expected specific date string from datetime object", ) self.assertEqual( date2string(datetime.datetime(2011, 11, 30, 12, 12, 12)), "2011-11-30 12:12:12", "Expected specific date string from datetime object", )