Example #1
0
    def setUp(self):
        """bootstrap"""

        self.client = WoudcClient()
Example #2
0
    def metadata_validator(self):
        """
        Robust validator to check metadata against values
        in WOUDC database. Returns a dictionary of
        errors or warnings depending on severity of violation.

        :returns: Dictionary of the form:
                  {'status' : True if validation passed,
                              False otherwise.
                   'warnings' : a list of warnings.
                   'errors': a list of errors.
                  }
        """
        error_dict = {'status': False, 'warnings': [], 'errors': []}

        client = WoudcClient()

        # Attempt basic validation
        LOGGER.debug('Attempting basic table validation.')
        error, violations = global_validate(self.sections)
        if error:
            error_dict['errors'].append('The following violations were \
found: \n %s' % '\n'.join(violations))
            return error_dict

        LOGGER.info('Basic table validation passed.')

        # Check that Content Category and Level are Valid
        LOGGER.debug('Checking Content Category and Level.')
        category = self.sections['CONTENT']['Category']
        level = self.sections['CONTENT']['Level']
        if category != 'UmkehrN14' and level != '1.0' and level != '1':
            error_dict['errors'].append('Level for category %s must \
be 1.0' % category)
            return error_dict

        if category == 'UmkehrN14' and 'N14_VALUES' in self.sections and level != '1.0' and level != '1': # noqa
            error_dict['errors'].append('Level for category UmkehrN14 \
with table N14_VALUES must be 1.0')
            return error_dict

        if category == 'UmkehrN14' and 'C_PROFILE' in self.sections and level != '2.0' and level != '2': # noqa
            error_dict['errors'].append('Level for category UmkehrN14 \
with table N14_VALUES must be 1.0')
            return error_dict

        LOGGER.info('Content Category and Level are valid.')

        # Attempt further basic validation given category and level
        LOGGER.debug('Attempting specific table validation for category %s and level %s.' % (category, level)) # noqa
        error, violations = global_validate(self.sections, category, '1' if level == '1.0' else '2', '1') # noqa
        if error:
            error_dict['errors'].append('The following violations were \
found: \n %s' % '\n'.join(violations))
            return error_dict

        LOGGER.info('Further table validation passed.')

        # Check agency and platform information
        LOGGER.debug('Resolving Agency and Platform information.')
        f_type = self.sections['PLATFORM']['Type']
        f_ID = self.sections['PLATFORM']['ID']
        f_name = self.sections['PLATFORM']['Name'].encode('utf-8')
        f_country = self.sections['PLATFORM']['Country']
        f_gaw_id = None
        try:
            f_gaw_id = self.sections['PLATFORM']['GAW_ID']
        except Exception:
            error_dict['warnings'].append('GAW_ID field is \
spelled incorrectly.')
        f_agency = self.sections['DATA_GENERATION']['Agency']
        f_lat = float(self.sections['LOCATION']['Latitude'])
        f_lon = float(self.sections['LOCATION']['Longitude'])

        agency_params = {'property_name': 'acronym', 'property_value': f_agency} # noqa
        platform_id_params = {'property_name' : 'platform_id', 'property_value' : f_ID} # noqa
        platform_name_params = {'property_name' : 'platform_name', 'property_value' : f_name} # noqa
        if client.get_data('stations', **agency_params) is None:
            agency_params['property_name'] = 'contributor_name'
            data = client.get_data('stations', **agency_params)
            if data is None:
                acronym_set = Set()
                LOGGER.debug('Resolving Agency through platform ID.')
                data = client.get_data('stations', **platform_id_params)
                if data is not None:
                    for row in data['features']:
                        properties = row['properties']
                        acronym_set.add(properties['acronym'])
                LOGGER.debug('Resolving Agency through platform name.')
                data = client.get_data('stations', **platform_name_params)
                if data is not None:
                    for row in data['features']:
                        properties = row['properties']
                        acronym_set.add(properties['acronym'])

                if acronym_set != Set():
                    LOGGER.info('Possible Agency matches found.')
                    error_dict['errors'].append('The following agencies \
match the given platform name and/or ID: %s' % ','.join(list(acronym_set)))
                    return error_dict

                LOGGER.info('No Agency matches found.')
                error_dict['errors'].append('Agency acronym of %s not \
found in the woudc database. If this is a new agency, \
please notify WOUDC' % f_agency)
                return error_dict
            else:
                LOGGER.info('Agency name used instead of acronym.')
                acronym = data['features'][0]['properties']['acronym']
                error_dict['errors'].append('Please use the \
Agency acronym of %s.' % acronym) # noqa
                return error_dict

        LOGGER.info('Successfully validated Agency.')
        LOGGER.debug('Resolving platform information.')
        data = client.get_data('stations', **platform_id_params)
        flag = False
        a_set = Set()
        if data is not None:
            for row in data['features']:
                properties = row['properties']
                a_set.add(properties['acronym'])
                if properties['acronym'] == f_agency:
                    if properties['platform_type'] != f_type:
                        error_dict['errors'].append('Platform type \
of %s does not match database. Please change it \
to %s' % (f_type, properties['platform_type']))
                        return error_dict
                    if properties['country_code'] != f_country:
                        error_dict['errors'].append('Platform country \
of %s does not match database. Please change it \
to %s' % (f_country, properties['country_code']))
                        return error_dict
                    if properties['platform_name'].encode('utf-8') != f_name:
                        error_dict['errors'].append('Platform name \
of %s does not match database. Please change it \
to %s' % (f_name, properties['platform_name'].encode('utf-8')))
                        return error_dict
                    if abs(float(row['geometry']['coordinates'][0]) - f_lon) >= 1: # noqa
                        error_dict['errors'].append('Location Longitude \
of %s does not match database. Please change it \
to %s.' % (f_lon, row['geometry']['coordinates'][0]))
                        return error_dict
                    if abs(float(row['geometry']['coordinates'][1]) - f_lat) >= 1: # noqa
                        error_dict['errors'].append('Location Latitude \
of %s does not match database. Please change it \
to %s.' % (f_lat, row['geometry']['coordinates'][1]))
                        return error_dict
                    if properties['gaw_id'] != f_gaw_id:
                        error_dict['warnings'].append('Platform GAW_ID \
of %s does not match database. Please change it \
to %s' % (f_gaw_id, properties['gaw_id']))
                    LOGGER.info('Successfully validated platform.')
                    flag = True
        if not flag:
            data = client.get_data('stations', **platform_name_params)
            if data is not None:
                for row in data['features']:
                    properties = row['properties']
                    a_set.add(properties['acronym'])
                    if properties['acronym'] == f_agency:
                        if properties['platform_type'] != f_type:
                            error_dict['errors'].append('Platform type \
of %s does not match database. Please change it \
to %s' % (f_type, properties['platform_type']))
                            return error_dict
                        if properties['country_code'] != f_country:
                            error_dict['errors'].append('Platform country \
of %s does not match database. Please change it \
to %s' % (f_country, properties['country_code']))
                            return error_dict
                        if properties['platform_id'] != f_ID:
                            error_dict['errors'].append('Platform ID \
of %s does not match database. Please change it \
to %s' % (f_ID, properties['platform_id']))
                            return error_dict
                        if abs(float(row['geometry']['coordinates'][0]) - f_lon) >= 1: # noqa
                            error_dict['errors'].append('Location Longitude \
of %s does not match database. Please change it \
to %s.' % (f_lon, row['geometry']['coordinates'][0]))
                            return error_dict
                        if abs(float(row['geometry']['coordinates'][1]) - f_lat) >= 1: # noqa
                            error_dict['errors'].append('Location Latitude \
of %s does not match database. Please change it \
to %s.' % (f_lat, row['geometry']['coordinates'][1]))
                            return error_dict
                        if properties['gaw_id'] != f_gaw_id:
                            error_dict['warnings'].append('Platform GAW_ID \
of %s does not match database. Please change it \
to %s' % (f_gaw_id, properties['gaw_id']))
                        LOGGER.info('Successfully validated platform.')
                        flag = True
            if not flag:
                LOGGER.info('Failed to validate platform.')
                if len(a_set) > 0:
                    error_dict['errors'].append('Agency and Platform \
information do not match. These agencies are valid for this \
platform: %s' % (','.join(list(a_set))))
                    return error_dict
                else:
                    error_dict['errors'].append('Could not find a record \
for either the platform name or ID. If this is a new station, \
please notify WOUDC.')
                    return error_dict

        # Check existence of instrument Name and model
        LOGGER.debug('Resolving Instrument information.')
        inst_name = self.sections['INSTRUMENT']['Name'].lower()
        inst_model = self.sections['INSTRUMENT']['Model']
        inst_model_upper = inst_model.upper()
        inst_name_params = {'property_name': 'instrument_name', 'property_value': inst_name} # noqa
        inst_model_params = {'property_name': 'instrument_model', 'property_value': inst_model} # noqa
        data = client.get_data('instruments', **inst_name_params)
        if data is None:
            LOGGER.info('Failed to located Instrument name.')
            error_dict['errors'].append('Instrument Name is not in database. \
Please verify that it is correct.\nNote: If the instrument name is valid, \
this file will be rejected and then manually processed into the database.')
            return error_dict
        else:
            # Check if a new uri needs to be generated
            found = False
            for row in data['features']:
                properties = row['properties']
                if properties['contributor_id'] == f_agency and properties['data_category'] == category and properties['data_level'] == float(level): # noqa
                    found = True
            if not found:
                error_dict['warnings'].append('This is a new instrument \
class/data_category/data_level for this agency.\nThe \
file will be rejected and then manually processed into the database.')

        data = client.get_data('instruments', **inst_model_params)
        if data is None:
            inst_model_params['property_value'] = inst_model_upper
            data = client.get_data('instruments', **inst_model_params)
            if data is None:
                inst_model_params['property_value'] = inst_model.title()
                data = client.get_data('instruments', **inst_model_params)
                if data is None:
                    LOGGER.info('Failed to located Instrument model.')
                    error_dict['errors'].append('Instrument Model \
is not in database. Please verify that it is correct.\nNote: If \
the instrument model is valid, this file will be rejected and then \
manually processed into the database.')
                    return error_dict

        LOGGER.info('Instrument verification passed.')
        # Check for trailing commas in payload
        LOGGER.debug('Checking payload for trailing commas.')
        payload_tables = []
        for table in self.sections.keys():
            if len(self.sections[table].keys()) == 1:
                payload_tables.append(table)

        for payload_table in payload_tables:
            payload_lines = self.sections[payload_table]['_raw'].split('\n')
            header_len = len(payload_lines[0].strip().strip(',').split(',')) - 1 # noqa
            fewer_commas = False
            for line in payload_lines:
                if line.count(',') > header_len:
                    LOGGER.info('Found trailing commas.')
                    error_dict['errors'].append('This file has extra \
trailing commas. Please remove them before submitting.\nFirst line in \
file with trailing commas:\n%s' % line)
                    return error_dict
                if line.count(',') < header_len and line.strip() != '':
                    fewer_commas = True

        if fewer_commas:
            error_dict['warnings'].append('Some lines in this file have \
fewer commas than there are headers.\nPlease consider adding in extra \
commas for readability.')
        LOGGER.info('No trailing commas found.')
        LOGGER.info('This file passed validation.')
        error_dict['status'] = True
        return error_dict
Example #3
0
class WoudcClientTest(unittest.TestCase):
    """Test suite for package pywoudc.WoudcClient"""

    def setUp(self):
        """bootstrap"""

        self.client = WoudcClient()

    def tearDown(self):
        """destroy"""
        pass

    def test_smoke_test(self):
        """test basic properties"""

        self.assertEqual(self.client.url, 'https://geo.woudc.org/ows',
                         'Expected specific URL')

        self.assertEqual(self.client.about,
                         'https://woudc.org/about/data-access.php',
                         'Expected specific about URL')
        self.assertEqual(self.client.outputformat,
                         'application/json; subtype=geojson',
                         'Expected specific default outputformat')

        self.assertEqual(self.client.maxfeatures, 25000,
                         'Expected specific default maxfeatures')

        self.assertEqual(self.client.timeout, 30,
                         'Expected specific default timeout')

        self.assertTrue(isinstance(self.client.server,
                                   WebFeatureService_1_1_0),
                        'Expected specific instance')

    def test_get_metadata(self):
        """test get various requests for metadata"""

        for typename in ['stations', 'contributors']:
            data = self.client._get_metadata(typename)

            self.assertTrue(isinstance(data, dict),
                            'Expected specific instance')

            self.assertTrue('type' in data,
                            'Expected GeoJSON header')

            self.assertEqual(data['type'], 'FeatureCollection',
                             'Expected GeoJSON header')

            self.assertTrue('features' in data,
                            'Expected GeoJSON header')

            self.assertTrue(len(data['features']) > 0,
                            'Expected non-empty %s list' % typename)

            raw_data = self.client._get_metadata(typename, raw=True)

            self.assertTrue(isinstance(raw_data, str),
                            'Expected specific instance')

            self.assertTrue('"type": "FeatureCollection"' in raw_data,
                            'Expected raw GeoJSON response')

    def test_get_data(self):
        """test get data handling"""

        dataset = 'totalozone'
        bad_bbox = [42, -52, 84]

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, bbox=bad_bbox)

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, bbox='-142,42,-53,84')

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, temporal='2000-11-11/2001-10-30')

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, temporal=['2000-11-11'])

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, sort_order='bad')

        self.assertRaises(ValueError, self.client.get_data,
                          dataset, variables='foo')

    def test_date2string(self):
        """test date handling"""

        self.assertEqual(date2string('2000-10-10', 'begin'),
                         '2000-10-10 00:00:00',
                         'Expected specific date string from date string')

        self.assertEqual(date2string('2001-11-11', 'end'),
                         '2001-11-11 23:59:59',
                         'Expected specific date string from date string')

        self.assertEqual(date2string('2000-10-10 02:22:28'),
                         '2000-10-10 02:22:28',
                         'Expected specific date string from datetime string')

        self.assertEqual(date2string('2001-11-11 11:33:24'),
                         '2001-11-11 11:33:24',
                         'Expected specific date string from datetime string')

        self.assertEqual(date2string(datetime.date(2000, 11, 30), 'begin'),
                         '2000-11-30 00:00:00',
                         'Expected specific date string from date object')

        self.assertEqual(date2string(datetime.date(2011, 11, 30), 'end'),
                         '2011-11-30 23:59:59',
                         'Expected specific date string from date object')

        self.assertEqual(date2string(
                         datetime.datetime(2002, 10, 30, 11, 11, 11)),
                         '2002-10-30 11:11:11',
                         'Expected specific date string from datetime object')

        self.assertEqual(date2string(
                         datetime.datetime(2011, 11, 30, 12, 12, 12)),
                         '2011-11-30 12:12:12',
                         'Expected specific date string from datetime object')
Example #4
0
class WoudcClientTest(unittest.TestCase):
    """Test suite for package pywoudc.WoudcClient"""

    def setUp(self):
        """bootstrap"""

        self.client = WoudcClient()

    def tearDown(self):
        """destroy"""
        pass

    def test_smoke_test(self):
        """test basic properties"""

        self.assertEqual(self.client.url, "http://geo.woudc.org/ows", "Expected specific URL")

        self.assertEqual(self.client.about, "http://woudc.org/about/data-access.php", "Expected specific about URL")
        self.assertEqual(
            self.client.outputformat, "application/json; subtype=geojson", "Expected specific default outputformat"
        )

        self.assertEqual(self.client.maxfeatures, 25000, "Expected specific default maxfeatures")

        self.assertEqual(self.client.timeout, 30, "Expected specific default timeout")

        self.assertTrue(isinstance(self.client.server, WebFeatureService_1_1_0), "Expected specific instance")

    def test_get_metadata(self):
        """test get various requests for metadata"""

        for typename in ["stations", "contributors"]:
            data = self.client._get_metadata(typename)

            self.assertTrue(isinstance(data, dict), "Expected specific instance")

            self.assertTrue("type" in data, "Expected GeoJSON header")

            self.assertEqual(data["type"], "FeatureCollection", "Expected GeoJSON header")

            self.assertTrue("features" in data, "Expected GeoJSON header")

            self.assertTrue(len(data["features"]) > 0, "Expected non-empty %s list" % typename)

            raw_data = self.client._get_metadata(typename, raw=True)

            self.assertTrue(isinstance(raw_data, str), "Expected specific instance")

            self.assertTrue('"type": "FeatureCollection"' in raw_data, "Expected raw GeoJSON response")

    def test_get_data(self):
        """test get data handling"""

        dataset = "totalozone"
        bad_bbox = [42, -52, 84]

        self.assertRaises(ValueError, self.client.get_data, dataset, bbox=bad_bbox)

        self.assertRaises(ValueError, self.client.get_data, dataset, bbox="-142,42,-53,84")

        self.assertRaises(ValueError, self.client.get_data, dataset, temporal="2000-11-11/2001-10-30")

        self.assertRaises(ValueError, self.client.get_data, dataset, temporal=["2000-11-11"])

        self.assertRaises(ValueError, self.client.get_data, dataset, sort_order="bad")

        self.assertRaises(ValueError, self.client.get_data, dataset, variables="foo")

    def test_date2string(self):
        """test date handling"""

        self.assertEqual(
            date2string("2000-10-10", "begin"), "2000-10-10 00:00:00", "Expected specific date string from date string"
        )

        self.assertEqual(
            date2string("2001-11-11", "end"), "2001-11-11 23:59:59", "Expected specific date string from date string"
        )

        self.assertEqual(
            date2string("2000-10-10 02:22:28"),
            "2000-10-10 02:22:28",
            "Expected specific date string from datetime string",
        )

        self.assertEqual(
            date2string("2001-11-11 11:33:24"),
            "2001-11-11 11:33:24",
            "Expected specific date string from datetime string",
        )

        self.assertEqual(
            date2string(datetime.date(2000, 11, 30), "begin"),
            "2000-11-30 00:00:00",
            "Expected specific date string from date object",
        )

        self.assertEqual(
            date2string(datetime.date(2011, 11, 30), "end"),
            "2011-11-30 23:59:59",
            "Expected specific date string from date object",
        )

        self.assertEqual(
            date2string(datetime.datetime(2002, 10, 30, 11, 11, 11)),
            "2002-10-30 11:11:11",
            "Expected specific date string from datetime object",
        )

        self.assertEqual(
            date2string(datetime.datetime(2011, 11, 30, 12, 12, 12)),
            "2011-11-30 12:12:12",
            "Expected specific date string from datetime object",
        )