Ejemplo n.º 1
0
    def test_annotate_getcapabilities_url(self):
        """An incoming FIS-Broker service resource with URL parameters containing
           'request=GetCapabilities' should be annotated correctly. That means the URL
           should be unchanged, the internal function should be set etc.
           Case of 'getcapabilities' should not matter.
           Test both WFS and WMS."""

        wfs_urls = [
            'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015?request=getcapabilities&service=wfs&version=2.0.0',
            'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015?request=GetCapabilities&service=wfs&version=2.0.0',
        ]
        for url in wfs_urls:
            resource = {'url': url}
            annotator = FISBrokerResourceAnnotator()
            converted_resource = annotator.annotate_resource(resource)
            _assert_equal(converted_resource['url'], url)
            _assert_equal(converted_resource['format'], FORMAT_WFS)
            _assert_equal(converted_resource['internal_function'], FUNCTION_API_DESCRIPTION)
            assert converted_resource['main']

        url = 'https://fbinter.stadt-berlin.de/fb/wms/senstadt/wmsk_02_14_04gwtemp_60m?request=getcapabilities&service=wms&version=1.3.0'
        resource = {'url': url}
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)
        _assert_equal(converted_resource['url'], url)
        _assert_equal(converted_resource['format'], FORMAT_WMS)
        _assert_equal(converted_resource['internal_function'], FUNCTION_API_DESCRIPTION)
        assert converted_resource['main']
Ejemplo n.º 2
0
    def test_sort_resources_by_weight(self):
        '''A list of resource dicts should be returned ordered in ascending
           order by the 'weight' member.'''

        resources = [
            {
                'name': 'foo',
                'weight': 20,
            },
            {
                'name': 'bar',
                'weight': 5,
            },
            {
                'name': 'daz',
                'weight': 10,
            },
            {
                'name': 'dingo',
                'weight': 15,
            },
            {
                'name': 'baz',
                'weight': 25,
            },
        ]

        annotator = FISBrokerResourceAnnotator()
        sorted_weights = [resource['weight'] for resource in annotator.sort_resources(resources)]
        _assert_equal([5, 10, 15, 20, 25], sorted_weights)
Ejemplo n.º 3
0
    def test_only_wms_and_wfs_allowed(self):
        '''Some methods only allow 'wms' or 'wfs' as the service parameter.
           Other values should raise an exception.'''
        with assert_raises(ValueError):
            FISBrokerResourceAnnotator.service_version('atom')
        with assert_raises(ValueError):
            FISBrokerResourceAnnotator.getcapabilities_query('atom')

        with assert_raises(ValueError):
            annotator = FISBrokerResourceAnnotator()
            url = 'https://fbinter.stadt-berlin.de/fb/feed/senstadt/a_SU_LOR'
            annotator.annotate_service_resource({'url': url})
Ejemplo n.º 4
0
    def test_arbitrary_url_without_description_is_ignored(self):
        url = 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf'
        res_format = FORMAT_PDF
        resource = {
            'url': url,
            'name': 'Technische Beschreibung',
            'format': res_format
        }
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource, None)
Ejemplo n.º 5
0
    def test_annotate_atom_feed(self):

        url = 'https://fbinter.stadt-berlin.de/fb/feed/senstadt/a_SU_LOR'
        resource = {'url': url}
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource['url'], url)
        _assert_equal(converted_resource['name'], "Atom Feed")
        _assert_equal(converted_resource['description'], "Atom Feed")
        _assert_equal(converted_resource['format'], FORMAT_ATOM)
        _assert_equal(converted_resource['internal_function'], FUNCTION_API_ENDPOINT)
        assert converted_resource['main']
Ejemplo n.º 6
0
    def test_annotate_service_endpoint_url(self):
        """An incoming FIS-Broker service resource without query strings is the
           service endpoint. Test that it is annotated correctly.
           Test both WFS and WMS."""

        url = 'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015'
        resource = {'url': url}
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)
        _assert_equal(converted_resource['url'], url)
        _assert_equal(converted_resource['format'], FORMAT_WFS)
        _assert_equal(converted_resource['internal_function'], FUNCTION_API_ENDPOINT)
        assert not converted_resource['main']
Ejemplo n.º 7
0
    def test_annotate_service_page(self):
        service_urls = [
            "http://fbinter.stadt-berlin.de/fb?loginkey=showMap&mapId=nsg_lsg@senstadt",
            "http://fbinter.stadt-berlin.de/fb/index.jsp?loginkey=showMap&mapId=nsg_lsg@senstadt",
            "https://fbinter.stadt-berlin.de/fb?loginkey=showMap&mapId=nsg_lsg@senstadt",
            "https://fbinter.stadt-berlin.de/fb/index.jsp?loginkey=showMap&mapId=nsg_lsg@senstadt",
        ]

        for url in service_urls:
            resource = {'url': url}
            annotator = FISBrokerResourceAnnotator()
            converted_resource = annotator.annotate_resource(resource)

            _assert_equal(converted_resource['url'], url)
            _assert_equal(converted_resource['name'], "Serviceseite im FIS-Broker")
            _assert_equal(converted_resource['format'], FORMAT_HTML)
            _assert_equal(converted_resource['internal_function'], FUNCTION_WEB_INTERFACE)
            assert not converted_resource['main']
Ejemplo n.º 8
0
    def test_annotate_arbitrary_url_with_description(self):
        url = 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf'
        description = 'Technische Beschreibung'
        res_format = FORMAT_PDF
        resource = {
            'url': url,
            'name': description,
            'description': description,
            'format': res_format
        }
        annotator = FISBrokerResourceAnnotator()
        converted_resource = annotator.annotate_resource(resource)

        _assert_equal(converted_resource['name'], description)
        _assert_equal(converted_resource['description'], description)
        _assert_equal(converted_resource['format'], FORMAT_PDF)
        _assert_equal(converted_resource['internal_function'], FUNCTION_DOCUMENTATION)
        _assert_equal(converted_resource['url'], url)
        assert not converted_resource['main']
Ejemplo n.º 9
0
    def get_package_dict(self, context, data_dict):
        '''Implementation of ckanext.spatial.interfaces.ISpatialHarvester.get_package_dict().
        https://github.com/ckan/ckanext-spatial/blob/master/ckanext/spatial/interfaces.py
        '''
        LOG.debug("--------- get_package_dict ----------")

        if hasattr(data_dict, '__getitem__'):

            package_dict = data_dict['package_dict']
            iso_values = data_dict['iso_values']

            LOG.debug(iso_values['title'])

            # checking if marked for Open Data
            if not marked_as_opendata(data_dict):
                LOG.debug("no 'opendata' tag, skipping dataset ...")
                context['error'] = json.dumps({
                    'code':
                    1,
                    'description':
                    'not tagged as open data'
                })
                return 'skip'
            LOG.debug("this is tagged 'opendata', continuing ...")

            # we're only interested in service resources
            if not marked_as_service_resource(data_dict):
                LOG.debug(
                    "this is not a service resource, skipping dataset ...")
                context['error'] = json.dumps({
                    'code':
                    2,
                    'description':
                    'not a service resource'
                })
                return 'skip'
            LOG.debug("this is a service resource, continuing ...")

            extras = self.extras_dict(package_dict['extras'])

            # filter out various tags
            to_remove = [u'äöü', u'opendata', u'open data']
            package_dict['tags'] = filter_tags(to_remove, iso_values['tags'],
                                               package_dict['tags'])

            # Veröffentlichende Stelle / author
            # Datenverantwortliche Stelle / maintainer
            # Datenverantwortliche Stelle Email / maintainer_email

            contact_info = extract_contact_info(data_dict)

            if 'author' in contact_info:
                package_dict['author'] = contact_info['author']
            else:
                LOG.error(
                    'could not determine responsible organisation name, skipping ...'
                )
                context['error'] = json.dumps({
                    'code':
                    3,
                    'description':
                    'no organisation name'
                })
                return 'skip'

            if 'maintainer_email' in contact_info:
                package_dict['maintainer_email'] = contact_info[
                    'maintainer_email']
            else:
                LOG.error(
                    'could not determine responsible organisation email, skipping ...'
                )
                context['error'] = json.dumps({
                    'code':
                    4,
                    'description':
                    'no responsible organisation email'
                })
                return 'skip'

            if 'maintainer' in contact_info:
                package_dict['maintainer'] = contact_info['maintainer']

            # Veröffentlichende Stelle Email / author_email
            # Veröffentlichende Person / extras.username

            # license_id

            license_and_attribution = extract_license_and_attribution(
                data_dict)

            if 'license_id' not in license_and_attribution:
                LOG.error('could not determine license code, skipping ...')
                context['error'] = json.dumps({
                    'code':
                    5,
                    'description':
                    'could not determine license code'
                })
                return 'skip'

            package_dict['license_id'] = license_and_attribution['license_id']

            if 'attribution_text' in license_and_attribution:
                extras['attribution_text'] = license_and_attribution[
                    'attribution_text']

            # extras.date_released / extras.date_updated

            reference_dates = extract_reference_dates(data_dict)

            if 'date_released' not in reference_dates:
                LOG.error(
                    'could not get anything for date_released from ISO values, skipping ...'
                )
                context['error'] = json.dumps({
                    'code': 6,
                    'description': 'no release date'
                })
                return 'skip'

            extras['date_released'] = reference_dates['date_released']

            if 'date_updated' in reference_dates:
                extras['date_updated'] = reference_dates['date_updated']

            # resources

            annotator = FISBrokerResourceAnnotator()
            resources = annotator.annotate_all_resources(
                package_dict['resources'])
            package_dict['resources'] = helpers.uniq_resources_by_url(
                resources)

            # URL
            package_dict['url'] = extract_url(package_dict['resources'])

            # Preview graphic
            preview_markup = extract_preview_markup(data_dict)
            if preview_markup:
                preview_markup = "\n\n" + preview_markup
                package_dict['notes'] += preview_markup

            # title
            package_dict['title'] = generate_title(data_dict)

            # name
            package_dict['name'] = generate_name(data_dict)

            # internal dataset type:

            extras['berlin_type'] = 'datensatz'

            # source:

            extras['berlin_source'] = 'harvest-fisbroker'

            # always put in 'geo' group

            package_dict['groups'] = [{'name': 'geo'}]

            # geographical_granularity

            extras['geographical_granularity'] = "Berlin"
            # TODO: can we determine this from the ISO values?

            # geographical_coverage

            extras['geographical_coverage'] = "Berlin"
            # TODO: can we determine this from the ISO values?

            # temporal_granularity

            extras['temporal_granularity'] = "Keine"
            # TODO: can we determine this from the ISO values?

            # temporal_coverage-from
            # TODO: can we determine this from the ISO values?
            # shold be iso_values['temporal-extent-begin']
            # which is derived from:
            # gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement
            # but that doesn't show up anywhere in FIS Broker...

            # temporal_coverage-to
            # TODO: can we determine this from the ISO values?
            # shold be iso_values['temporal-extent-end']

            # LOG.debug("----- data after get_package_dict -----")
            # LOG.debug(package_dict)

            # extras
            package_dict['extras'] = extras_as_list(extras)

            return package_dict
        else:
            LOG.debug('calling get_package_dict on CSWHarvester')
            return CSWHarvester.get_package_dict(self, context, data_dict)
Ejemplo n.º 10
0
    def test_ensure_endpoint_description_is_present(self):
        '''When converting a set of resources for a WFS or WMS service, ensure there is an endpoint
           description (a GetCapabilities-URL), and all resources are annotated as expected.'''

        resources = [
            {
                'url': 'https://fbinter.stadt-berlin.de/fb?loginkey=showMap&mapId=nsg_lsg@senstadt'
            },
            {
                'url': 'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015'
            },
            {
                'url': 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf',
                'description': 'Technische Beschreibung'
            }
        ]

        annotator = FISBrokerResourceAnnotator()
        annotated = annotator.annotate_all_resources(resources)
        expected = [
            {
                'name': 'Endpunkt-Beschreibung des WFS-Service',
                'weight': 10,
                'format': FORMAT_WFS,
                'url': normalize_url('https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015?request=getcapabilities&service=wfs&version=2.0.0'),
                'internal_function': FUNCTION_API_DESCRIPTION,
                'main': True,
                'description': 'Maschinenlesbare Endpunkt-Beschreibung des WFS-Service. Weitere Informationen unter https://www.ogc.org/standards/wfs'
            },
            {
                'name': 'API-Endpunkt des WFS-Service',
                'weight': 15,
                'format': FORMAT_WFS,
                'url': 'https://fbinter.stadt-berlin.de/fb/wfs/data/senstadt/s_boden_wfs1_2015',
                'internal_function': FUNCTION_API_ENDPOINT,
                'main': False,
                'description': 'API-Endpunkt des WFS-Service. Weitere Informationen unter https://www.ogc.org/standards/wfs'
            },
            {
                'name': 'Serviceseite im FIS-Broker',
                'weight': 20,
                'format': FORMAT_HTML,
                'url': 'https://fbinter.stadt-berlin.de/fb?loginkey=showMap&mapId=nsg_lsg@senstadt', 'internal_function': 'web_interface',
                'main': False,
            },
            {
                'description': 'Technische Beschreibung',
                'weight': 30,
                'url': 'https://fbinter.stadt-berlin.de/fb_daten/beschreibung/umweltatlas/datenformatbeschreibung/Datenformatbeschreibung_kriterien_zur_bewertung_der_bodenfunktionen2015.pdf',
                'internal_function': 'documentation',
                'main': False,
                'name': 'Technische Beschreibung'
            }
        ]

        _assert_equal(annotated, expected)

        resources = [
            {
                'url': 'https://fbinter.stadt-berlin.de/fb/wms/senstadt/wmsk_02_14_04gwtemp_60m'
            }
        ]

        annotated = annotator.annotate_all_resources(resources)
        expected = [
            {
                'name': 'Endpunkt-Beschreibung des WMS-Service',
                'weight': 10,
                'format': FORMAT_WMS,
                'url': normalize_url('https://fbinter.stadt-berlin.de/fb/wms/senstadt/wmsk_02_14_04gwtemp_60m?request=getcapabilities&service=wms&version=1.3.0'),
                'internal_function': FUNCTION_API_DESCRIPTION,
                'main': True,
                'description': 'Maschinenlesbare Endpunkt-Beschreibung des WMS-Service. Weitere Informationen unter https://www.ogc.org/standards/wms'
            },
            {
                'url': 'https://fbinter.stadt-berlin.de/fb/wms/senstadt/wmsk_02_14_04gwtemp_60m',
                'name': 'API-Endpunkt des WMS-Service',
                'weight': 15,
                'format': FORMAT_WMS,
                'internal_function': FUNCTION_API_ENDPOINT,
                'main': False,
                'description': 'API-Endpunkt des WMS-Service. Weitere Informationen unter https://www.ogc.org/standards/wms'
            },
        ]

        _assert_equal(annotated, expected)