def test_dataset_json_ld_1(self):

        contents = self._get_file_contents('catalog_pod.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics')

        eq_(extras['contact_name'], 'Jane Doe')
        # mailto gets removed for storage and is added again on output
        eq_(extras['contact_email'], '*****@*****.**')
        eq_(extras['publisher_name'], 'Widget Services')
        eq_(extras['publisher_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 4)

        resource = [r for r in dataset['resources'] if r['name'] == 'widgets.csv'][0]
        eq_(resource['name'], u'widgets.csv')
        eq_(resource['url'], u'')
        eq_(resource['download_url'], u'')
Esempio n. 2
    def _build_and_parse_format_mediatype_graph(self,
        g = Graph()

        dataset = URIRef("")
        g.add((dataset, RDF.type, self.DCAT.Dataset))

        distribution = URIRef("")
        g.add((dataset, self.DCAT.distribution, distribution))
        g.add((distribution, RDF.type, self.DCAT.Distribution))
        if format_item:
            g.add((distribution, self.DCT['format'], format_item))
        if mediatype_item:
            g.add((distribution, self.DCAT.mediaType, mediatype_item))
        if format_item is None and mediatype_item is None:
            raise AssertionError(
                'At least one of format or mediaType is required!')

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        return dataset.get('resources')
Esempio n. 3
    def test_subthemes(self):


        subthemes = [{
            'subthemes': [
        }, {
            'theme': 'ENVI',
            'subthemes': []

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            'theme': json.dumps(subthemes),

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        serialized = s.serialize_dataset(dataset)

        datasets = list(p.datasets())

        assert len(datasets) == 1
        d = datasets[0]
        themes = json.loads(dataset['theme'])
        assert (len(themes) == len(subthemes) == 2)
        for t in themes:
            if t['theme'] == 'ENVI':
                assert t['subthemes'] == []
            elif t['theme'] == 'AGRI':
                assert set(t['subthemes']) == set(subthemes[0]['subthemes'])
                assert False, "Unknown theme: {}".format(t)
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal("text/csv")))
        g.add((distribution1_1, DCT["format"], Literal("Comma Separated Values")))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]["resources"][0]

        if toolkit.check_ckan_version(min_version="2.3"):
            eq_(resource["format"], u"CSV")
            eq_(resource["mimetype"], u"text/csv")
            eq_(resource["format"], u"Comma Separated Values")
    def test_dataset_access_rights_and_distribution_rights_rights_statement_uriref(
        g = Graph()

        dataset_ref = URIRef("")
        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # access_rights
        access_rights = BNode()
        g.add((access_rights, RDF.type, DCT.RightsStatement))
        g.add((access_rights, RDFS.label,
        g.add((dataset_ref, DCT.accessRights, access_rights))
        # rights
        rights = BNode()
        g.add((rights, RDF.type, DCT.RightsStatement))
            (rights, RDFS.label, URIRef("")))
        distribution = URIRef("")
        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.rights, rights))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        extras = self._extras(dataset)
        assert extras['access_rights'] == ''
        resource = dataset['resources'][0]
        assert resource['rights'] == ''
    def test_dataset_json_ld_with_at_graph(self):

        contents = self._get_file_contents('catalog_with_at_graph.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'Title dataset')

        eq_(extras['contact_name'], 'Jane Doe')
        # mailto gets removed for storage and is added again on output
        eq_(extras['contact_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 1)

        resource = dataset['resources'][0]
        eq_(resource['name'], u'')
        eq_(resource['url'], u'')
    def test_dataset_access_rights_and_distribution_rights_rights_statement_literal(
        # license_id retrieved from the URI of dcat:license object
        g = Graph()

        dataset_ref = URIRef("")
        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # access_rights
        access_rights = BNode()
        g.add((access_rights, RDF.type, DCT.RightsStatement))
        g.add((access_rights, RDFS.label, Literal('public dataset')))
        g.add((dataset_ref, DCT.accessRights, access_rights))
        # rights
        rights = BNode()
        g.add((rights, RDF.type, DCT.RightsStatement))
        g.add((rights, RDFS.label, Literal('public distribution')))
        distribution = URIRef("")
        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.rights, rights))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        extras = self._extras(dataset)
        assert extras['access_rights'] == 'public dataset'
        resource = dataset['resources'][0]
        assert resource['rights'] == 'public distribution'
    def test_spatial_one_dct_spatial_instance(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
        g.add((spatial_uri, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert extras['spatial_uri'] == 'http://geonames/Newark'
        assert extras['spatial_text'] == 'Newark'
        assert extras[
            'spatial'], '{"type": "Point", "coordinates": [23 == 45]}'
Esempio n. 9
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        eq_(len([d for d in p.datasets()]), 0)
Esempio n. 10
    def test_parse_dataset_default_lang_not_in_graph(self):
        maxrdf = self._get_max_rdf()

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])


        datasets = [d for d in p.datasets()]
        self.assertEqual(len(datasets), 1)
        dataset = datasets[0]

        # Title and description random
        self.assertIn(u'Naturräume Geest und Marsch', dataset.get('title'))
            u'Die Zuordnung des Hamburger Stadtgebietes zu den Naturräumen Geest und Marsch wird dargestellt',
        # Publisher and ContactPoint
        extras = dataset.get('extras')
        self.assertTrue(len(extras) > 0)
        self.assertIn(u'Behörde für Umwelt und Energie (BUE), Amt für Umweltschutz', self._get_value_from_extras(extras, 'publisher_name'))
        self.assertIn(u'Herr Dr. Michael Schröder', self._get_value_from_extras(extras, 'contact_name'))
        # Resources
        resources = dataset.get('resources')
        self.assertEqual(len(resources), 2)
        for res in resources:
            # Title and description random
            self.assertIn(u'Naturräume Geest und Marsch', res.get('name'))
                u'Das ist eine deutsche Beschreibung der Distribution',
Esempio n. 11
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        assert len([d for d in p.datasets()]) == 0
Esempio n. 12
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        eq_(len([d for d in p.datasets()]), 0)
    def test_dataset_compatibility_mode(self):

        contents = self._get_file_contents('dataset.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'], compatibility_mode=True)


        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        def _get_extra_value(key):
            v = [
                extra['value'] for extra in dataset['extras']
                if extra['key'] == key
            return v[0] if v else None

        eq_(_get_extra_value('dcat_issued'), u'2012-05-10')
        eq_(_get_extra_value('dcat_modified'), u'2012-05-10T21:04:00')
            'Publishing Organization for dataset 1')
        eq_(_get_extra_value('dcat_publisher_email'), '*****@*****.**')
        eq_(_get_extra_value('language'), 'ca,en,es')
    def test_distribution_both_access_and_download_url(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.accessURL,
        g.add((distribution1_1, DCAT.downloadURL,
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['url'], u'')
        eq_(resource['download_url'], u'')
        eq_(resource['access_url'], u'')
    def test_distribution_format_IMT_field(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")

        imt = BNode()

        g.add((imt, RDF.type, DCT.IMT))
        g.add((imt, RDF.value, Literal('text/turtle')))
        g.add((imt, RDFS.label, Literal('Turtle')))

        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], imt))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'Turtle')
        eq_(resource['mimetype'], u'text/turtle')
    def test_spatial_both_geojson_and_wkt(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
        g.add((spatial_uri, LOCN.geometry,
               Literal('POINT (67 89)', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/csv')))
        g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        if toolkit.check_ckan_version(min_version='2.3'):
            eq_(resource['format'], u'CSV')
            eq_(resource['mimetype'], u'text/csv')
            eq_(resource['format'], u'Comma Separated Values')
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/csv')))
        g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        if toolkit.check_ckan_version(min_version='2.3'):
            eq_(resource['format'], u'CSV')
            eq_(resource['mimetype'], u'text/csv')
            eq_(resource['format'], u'Comma Separated Values')
Esempio n. 19
    def test_spatial_wrong_geometries(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
               Literal('Not GeoJSON', datatype=GEOJSON_IMT)))
               Literal('Not WKT', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial' not in extras)
Esempio n. 20
    def test_dataset_ttl(self):

        dataset = factories.Dataset(notes="Test dataset")

        url = url_for("dcat_dataset", _id=dataset["id"], _format="ttl")

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers["Content-Type"], "text/turtle")

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format="turtle")

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        dcat_dataset = dcat_datasets[0]

        eq_(dcat_dataset["title"], dataset["title"])
        eq_(dcat_dataset["notes"], dataset["notes"])
    def test_distribution_format_IMT_field(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")

        imt = BNode()

        g.add((imt, RDF.type, DCT.IMT))
        g.add((imt, RDF.value, Literal('text/turtle')))
        g.add((imt, RDFS.label, Literal('Turtle')))

        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], imt))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'Turtle')
        eq_(resource['mimetype'], u'text/turtle')
    def test_spatial_multiple_dct_spatial_instances(self):
        g = Graph()

        dataset = URIRef("")
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef("http://geonames/Newark")
        g.add((dataset, DCT.spatial, spatial_uri))

        location_ref = BNode()
        g.add((location_ref, RDF.type, DCT.Location))
        g.add((dataset, DCT.spatial, location_ref))
            (location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))

        location_ref = BNode()
        g.add((location_ref, RDF.type, DCT.Location))
        g.add((dataset, DCT.spatial, location_ref))
        g.add((location_ref, SKOS.prefLabel, Literal("Newark")))

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras["spatial_uri"], "http://geonames/Newark")
        eq_(extras["spatial_text"], "Newark")
        eq_(extras["spatial"], '{"type": "Point", "coordinates": [23, 45]}')
    def test_dataset_json_ld_1(self):

        contents = self._get_file_contents("catalog_pod.jsonld")

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.parse(contents, _format="json-ld")

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e["key"], e["value"]) for e in dataset["extras"])

        eq_(dataset["title"], "U.S. Widget Manufacturing Statistics")

        eq_(extras["contact_name"], "Jane Doe")
        eq_(extras["contact_email"], "mailto:[email protected]")
        eq_(extras["publisher_name"], "Widget Services")
        eq_(extras["publisher_email"], "*****@*****.**")

        eq_(len(dataset["resources"]), 4)

        resource = [r for r in dataset["resources"] if r["name"] == "widgets.csv"][0]
        eq_(resource["name"], u"widgets.csv")
        eq_(resource["url"], u"")
        eq_(resource["download_url"], u"")
    def test_dataset_ttl(self):

        dataset = factories.Dataset(
            notes='Test dataset'

        url = url_for('dcat_dataset', _id=dataset['id'], _format='ttl')

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers['Content-Type'], 'text/turtle')

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        dcat_dataset = dcat_datasets[0]

        eq_(dcat_dataset['title'], dataset['title'])
        eq_(dcat_dataset['notes'], dataset['notes'])
Esempio n. 25
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        assert len([d for d in p._datasets()]) == 3
    def test_catalog_modified_date(self):

        dataset1 = factories.Dataset(title='First dataset')
        dataset2 = factories.Dataset(title='Second dataset')

        url = url_for('dcat_catalog',

        app = self._get_test_app()

        response = app.get(url)

        content = response.body

        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        eq_(dcat_datasets[0]['title'], dataset2['title'])
    def test_spatial_one_dct_spatial_instance_no_uri(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        location_ref = BNode()
        g.add((dataset, DCT.spatial, location_ref))

        g.add((location_ref, RDF.type, DCT.Location))
               Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT)))
        g.add((location_ref, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial_uri' not in extras)
        eq_(extras['spatial_text'], 'Newark')
        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_spatial_both_geojson_and_wkt(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
               Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT)))
               Literal('POINT (67 89)', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
Esempio n. 29
    def test_parse_dataset_default_lang_de(self):
        maxrdf = self._get_max_rdf()

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])


        datasets = [d for d in p.datasets()]
        self.assertEqual(len(datasets), 1)
        dataset = datasets[0]

        # Title and description to be in default language "de"
                         u'Naturräume Geest und Marsch (DE)')
            u'Die Zuordnung des Hamburger Stadtgebietes zu den Naturräumen Geest und Marsch wird dargestellt. (DE)'
        # Publisher and ContactPoint
        extras = dataset.get('extras')
        self.assertTrue(len(extras) > 0)
            extras, 'publisher_name',
            u'Behörde für Umwelt und Energie (BUE), Amt für Umweltschutz (DE)')
        self._assert_extras_string(extras, 'contact_name',
                                   u'Herr Dr. Michael Schröder (DE)')
        # Resources
        self._assert_resource_lang(dataset, 'DE')
Esempio n. 30
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        eq_(len([d for d in p._datasets()]), 3)
Esempio n. 31
    def test_publisher(self):

        with open(get_example_file('catalog_dati_unibo.rdf'), 'r') as f:
            contents =

        p = RDFParser(profiles=['it_dcat_ap'])

        g = p.g

        datasets = [d for d in p.datasets()]
        assert (len(datasets) > 1)
        for d in datasets:
            did = d['identifier']
            pname = d.get('publisher_name')
            pid = d.get('publisher_identifier')
            dat_ref = list(g.subjects(DCT.identifier, Literal(did)))[0]
            pub_ref = g.value(dat_ref, DCT.publisher)
            pubnames = list(g.objects(pub_ref,
            if not pubnames:
                assert pname is None and pid is None,\
                    'Got {}/{} for publisher, when no ref in graph'.format(pname, pid)
                assert pname and pid, 'no pname {} and pid {} for {}'.format(
                    pname, pid, pubnames)

                lang_hit = False
                for lname in pubnames:
                    if hasattr(lname, 'lang'):
                        if lname.lang and lname.lang == DEFAULT_LANG:
                            lang_hit = pname == lname.value
                        if not lang_hit:
                            lang_hit = pname == lname.value
                assert lang_hit, 'There should be lang hit'
    def test_dataset_turtle_1(self):

        contents = self._get_file_contents('dataset_deri.ttl')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='n3')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        eq_(dataset['title'], 'Abandoned Vehicles')
        eq_(len(dataset['resources']), 1)

        resource = dataset['resources'][0]
        eq_(resource['name'], u'CSV distribution of: Abandoned Vehicles')
    def test_dataset_json_ld_1(self):

        contents = self._get_file_contents('catalog_pod.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics')

        eq_(extras['contact_name'], 'Jane Doe')
        eq_(extras['contact_email'], 'mailto:[email protected]')
        eq_(extras['publisher_name'], 'Widget Services')
        eq_(extras['publisher_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 4)

        resource = [r for r in dataset['resources'] if r['name'] == 'widgets.csv'][0]
        eq_(resource['name'], u'widgets.csv')
        eq_(resource['url'], u'')
        eq_(resource['download_url'], u'')
    def test_spatial_wrong_geometries(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
               Literal('Not GeoJSON', datatype=GEOJSON_IMT)))
               Literal('Not WKT', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial' not in extras)
Esempio n. 35
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        eq_(len([d for d in p._datasets()]), 3)
    def test_spatial_one_dct_spatial_instance_no_uri(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        location_ref = BNode()
        g.add((dataset, DCT.spatial, location_ref))

        g.add((location_ref, RDF.type, DCT.Location))
        g.add((location_ref, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
        g.add((location_ref, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial_uri' not in extras)
        eq_(extras['spatial_text'], 'Newark')
        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_creators(self):

        creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"},
                    {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"},
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'creator': json.dumps(creators)

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        serialized = s.serialize_dataset(dataset)

        datasets = list(p.datasets())
        assert len(datasets) == 1
        d = datasets[0]
        creators.append({'creator_identifier': dataset['creator_identifier'],
                              'creator_name': {DEFAULT_LANG: dataset['creator_name']}})

        creators_dict = dict((v['creator_identifier'], v) for v in creators)

        creators_in = json.loads(d['creator'])

        for c in creators_in:
            assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'],
            assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\
                "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name'])
        for c in creators_dict.keys():
            assert c in [_c['creator_identifier'] for _c in creators_in]
            cdata = creators_dict[c]
            assert cdata in creators_in
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': ''}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': ''},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': '',
            'language': 'de',

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': ''}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': ''},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': '',
            'language': 'de',

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
Esempio n. 40
    def test_profiles_are_called_on_datasets(self):

        p = RDFParser()

        p._profiles = [MockRDFProfile1, MockRDFProfile2]

        p.g = _default_graph()

        for dataset in p.datasets():
            assert dataset['profile_1']
            assert dataset['profile_2']
Esempio n. 41
    def test_catalog(self):

        contents = self._get_file_contents('catalog.xml')

        p = RDFParser(profiles=['swiss_dcat_ap'])


        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)
Esempio n. 42
    def test_profiles_are_called_on_datasets(self):

        p = RDFParser()

        p._profiles = [MockRDFProfile1, MockRDFProfile2]

        p.g = _default_graph()

        for dataset in p.datasets():
            assert dataset['profile_1']
            assert dataset['profile_2']
    def test_alternate_identifiers(self):

        contents = self._get_file_contents('dataset_identifier.rdf')

        p = RDFParser(profiles=['it_dcat_ap'])
        g = p.g
        datasets = [d for d in p.datasets()]
        assert len(datasets) == 1
        assert datasets[0]['alternate_identifier'] =='[{"identifier": "ISBN:alt id 123", "agent": {}}]',\
    def test_temporal_coverage(self):

        temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                             {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'temporal_coverage': json.dumps(temporal_coverage),

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        serialized = s.serialize_dataset(dataset)

        datasets = list(p.datasets())
        assert len(datasets) == 1
        d = datasets[0]

        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})

            validators.dcatapit_temporal_coverage(d['temporal_coverage'], {})
            # this should not raise exception
            assert True
        except validators.Invalid, err:
            assert False, "Temporal coverage should be valid: {}".format(err)
Esempio n. 45
    def test_alternate_identifiers(self):

        with open(get_example_file('dataset_identifier.rdf'), 'r') as f:
            contents =

        p = RDFParser(profiles=['it_dcat_ap'])

        datasets = [d for d in p.datasets()]
        assert len(datasets) == 1
        assert datasets[0]['alternate_identifier'] == '[{"identifier": "ISBN:alt id 123", "agent": {}}]',\
    def test_tags_with_commas(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub')))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]
        eq_(len(datasets[0]['tags']), 3)
    def test_tags_with_commas(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub')))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        assert len(datasets[0]['tags']) == 3
Esempio n. 48
    def test_datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        datasets = []
        for dataset in p.datasets():

            assert 'title' in dataset


        eq_(len(datasets), 3)
    def test_tags_with_commas_clean_tags_on(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG)))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        assert_true(self.VALID_TAG in datasets[0]['tags'])
        assert_true(self.INVALID_TAG not in datasets[0]['tags'])
    def test_tags_with_commas_clean_tags_off(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG)))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # when config flag is set to false, bad tags can happen
        datasets = [d for d in p.datasets()]
        assert_true(self.VALID_TAG not in datasets[0]['tags'])
        assert_true({'name': self.INVALID_TAG} in datasets[0]['tags'])
    def test_dataset_version_adms(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        g.add((dataset1, ADMS.version, Literal('2.3a')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]

        eq_(dataset['version'], u'2.3a')
Esempio n. 52
    def test_parse_without_pagination(self):

        data = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdfs:SomeClass rdf:about="">
            <rdfs:label>Some label</rdfs:label>

        p = RDFParser()


        eq_(p.next_page(), None)
Esempio n. 53
    def test_parse_data_different_format(self):

        data = '''
        @prefix rdf: <> .
        @prefix rdfs: <> .

        <> a rdfs:SomeClass ;
            rdfs:label "Some label" .

        p = RDFParser()

        eq_(len(p.g), 0)

        p.parse(data, _format='n3')

        eq_(len(p.g), 2)
    def test_catalog_xml_rdf(self):

        contents = self._get_file_contents("catalog.rdf")

        p = RDFParser(profiles=["euro_dcat_ap"])


        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)

        dataset = datasets[0] if datasets[0]["title"] == "Example dataset 1" else datasets[1]

        eq_(dataset["title"], "Example dataset 1")
        eq_(len(dataset["resources"]), 3)
        eq_(len(dataset["tags"]), 2)
    def test_catalog_xml_rdf(self):

        contents = self._get_file_contents('catalog.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'])


        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)

        dataset = (datasets[0] if datasets[0]['title'] == 'Example dataset 1'
                   else datasets[1])

        eq_(dataset['title'], 'Example dataset 1')
        eq_(len(dataset['resources']), 3)
        eq_(len(dataset['tags']), 2)
    def test_spatial_uri_only(self):
        g = Graph()

        dataset = URIRef("")
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef("http://geonames/Newark")
        g.add((dataset, DCT.spatial, spatial_uri))
        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras["spatial_uri"], "http://geonames/Newark")
        assert_true("spatial_text" not in extras)
        assert_true("spatial" not in extras)
    def test_spatial_uri_only(self):
        g = Graph()

        dataset = URIRef('')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial_uri'], 'http://geonames/Newark')
        assert_true('spatial_text' not in extras)
        assert_true('spatial' not in extras)
    def test_dataset_license_from_distribution_by_uri(self):
        # license_id retrieved from the URI of dcat:license object
        g = Graph()

        dataset = URIRef("")
        g.add((dataset, RDF.type, DCAT.Dataset))

        distribution = URIRef("")
        g.add((dataset, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.license,

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        eq_(dataset['license_id'], 'cc-by')
Esempio n. 59
    def parse_chunk(self, harvest_job, content, rdf_format, guids_in_source, object_ids):
        # TODO: store content?
        for harvester in p.PluginImplementations(IDCATRDFHarvester):
            content, after_download_errors = harvester.after_download(content, harvest_job)

            for error_msg in after_download_errors:
                self._save_gather_error(error_msg, harvest_job)

        if not content:
            return False

        # TODO: profiles conf
        parser = RDFParser()

            parser.parse(content, _format=rdf_format)
        except RDFParserException, e:
            self._save_gather_error('Error parsing the RDF file: {0}'.format(e), harvest_job)
            return False
    def test_distribution_format_format_only(self):
        g = Graph()

        dataset1 = URIRef("")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], Literal('CSV')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'CSV')