def test_spatial(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'},
                {'key': 'spatial_text', 'value': 'Tarragona'},
                {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2]
        assert spatial
        eq_(unicode(spatial), extras['spatial_uri'])
        assert self._triple(g, spatial, RDF.type, SCHEMA.Place)
        assert self._triple(g, spatial, SCHEMA.description, extras['spatial_text'])
        geo = self._triple(g, spatial, SCHEMA.geo, None)[2]
        assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape)
        assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
Example #2
0
    def test_publisher_extras(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {
                    'key': 'publisher_uri',
                    'value': 'http://example.com/publisher'
                },
                {
                    'key': 'publisher_name',
                    'value': 'Example Publisher'
                },
                {
                    'key': 'publisher_email',
                    'value': '*****@*****.**'
                },
                {
                    'key': 'publisher_url',
                    'value': 'http://example.com/publisher/home'
                },
                {
                    'key': 'publisher_type',
                    'value': 'http://purl.org/adms/publishertype/Company'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])
        assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
        assert self._triple(g, publisher, SCHEMA.name,
                            extras['publisher_name'])

        contact_point = self._triple(g, publisher, SCHEMA.contactPoint,
                                     None)[2]
        assert contact_point
        assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint)
        assert self._triple(g, contact_point, SCHEMA.name,
                            extras['publisher_name'])
        assert self._triple(g, contact_point, SCHEMA.email,
                            extras['publisher_email'])
        assert self._triple(g, contact_point, SCHEMA.url,
                            extras['publisher_url'])
        assert self._triple(g, contact_point, SCHEMA.contactType,
                            'customer service')
    def test_distribution_both_urls_the_same(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "url": "http://example.com/data/file.csv",
            "download_url": "http://example.com/data/file.csv",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource["url"]))
        assert self._triple(g, distribution, DCAT.accessURL, None) is None
    def test_distribution_format(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'CSV',
            'mimetype': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]

        assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['format'])
        assert self._triple(g, distribution, SCHEMA.fileType, resource['mimetype'])
Example #5
0
    def test_temporal_start_and_end(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'temporal_start',
                    'value': '2015-06-26T15:21:09.075774'
                },
                {
                    'key': 'temporal_end',
                    'value': '2015-07-14'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage,
                            '2015-06-26T15:21:09.075774/2015-07-14')
Example #6
0
    def test_distribution_format(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'CSV',
            'mimetype': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCT['format'], resource['format'])
        assert self._triple(g, distribution, DCAT.mediaType,
                            resource['mimetype'])
    def test_graph_from_dataset(self):

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'notes': 'Lorem ipsum',
            'url': 'http://example.com/ds1',
            'version': '1.0b',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'extras': [
                {'key': 'alternate_identifier', 'value': 'xyz'},
                {'key': 'version_notes', 'value': 'This is a beta version'},
                {'key': 'frequency', 'value': 'monthly'},
                {'key': 'language', 'value': '[\"en\"]'},
                {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'},
                {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version'])
        assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes'])
        assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier'])
        assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])

        # Dates
        assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime)
        assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime)

        # List
        for item in [
            ('language', DCT.language),
            ('theme', DCAT.theme),
            ('conforms_to', DCAT.conformsTo),
        ]:
            values = json.loads(extras[item[0]])
            eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, dataset_ref, item[1], value)
    def test_distribution_format_with_backslash(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "url": "http://example.com/data/file.csv",
            "format": "text/csv",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.mediaType, resource["format"])
    def test_contact_details_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'maintainer': 'Example Maintainer',
            'maintainer_email': '*****@*****.**',
            'author': 'Example Author',
            'author_email': '*****@*****.**',
            'extras': [
                {'key': 'contact_uri', 'value': 'http://example.com/contact'},
                {'key': 'contact_name', 'value': 'Example Contact'},
                {'key': 'contact_email', 'value': '*****@*****.**'},

            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        # Contact details

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        eq_(unicode(contact_details), extras['contact_uri'])
        assert self._triple(g, contact_details, VCARD.fn, extras['contact_name'])
        assert self._triple(g, contact_details, VCARD.hasEmail, extras['contact_email'])
    def test_spatial(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'},
                {'key': 'spatial_text', 'value': 'Tarragona'},
                {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2]
        assert spatial
        eq_(unicode(spatial), extras['spatial_uri'])
        assert self._triple(g, spatial, RDF.type, DCT.Location)
        assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text'])

        eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2)
        # Geometry in GeoJSON
        assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT)

        # Geometry in WKT
        wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4)
        assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
Example #11
0
    def test_distribution_both_urls_the_same(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution,
                                    None)[2]

        assert self._triple(g, distribution, SCHEMA.contentUrl,
                            resource['url'])
        assert self._triple(g, distribution, SCHEMA.url, None) is None
    def test_spatial_bad_json_no_wkt(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {
                    'key': 'spatial',
                    'value': 'NotJSON'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2]
        assert spatial
        assert_true(isinstance(spatial, BNode))
        # Geometry in GeoJSON
        assert self._triple(g, spatial, LOCN.geometry, extras['spatial'],
                            GEOJSON_IMT)

        # Geometry in WKT
        eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 1)
    def test_distribution_prefer_access_url(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data',
            'access_url': 'http://example.com/data/file',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.accessURL,
                            URIRef(resource['access_url']))
        assert self._triple(g, distribution, DCAT.downloadURL, None) is None
    def _build_graph_and_check_format_mediatype(self, dataset_dict,
                                                expected_format,
                                                expected_mediatype):
        """
        Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the
        first resource element.

        :param dataset_dict:
            dataset dict, expected to contain one resource
        :param expected_format:
            expected list of dct:format items in the resource
        :param expected_mediatype:
            expected list of dcat:mediaType items in the resource
        """
        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset_dict)

        # graph should contain the expected nodes
        resource_ref = list(g.objects(dataset_ref, DCAT.distribution))[0]
        dct_format = list(g.objects(resource_ref, DCT['format']))
        dcat_mediatype = list(g.objects(resource_ref, DCAT.mediaType))
        eq_(expected_format, dct_format)
        eq_(expected_mediatype, dcat_mediatype)
    def test_temporal(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'temporal_start',
                    'value': '2015-06-26T15:21:09.075774'
                },
                {
                    'key': 'temporal_end',
                    'value': '2015-07-14'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        temporal = self._triple(g, dataset_ref, DCT.temporal, None)[2]
        assert temporal

        assert self._triple(g, temporal, RDF.type, DCT.PeriodOfTime)
        assert self._triple(g, temporal, SCHEMA.startDate,
                            parse_date(extras['temporal_start']).isoformat(),
                            XSD.dateTime)
        assert self._triple(g, temporal, SCHEMA.endDate,
                            parse_date(extras['temporal_end']).isoformat(),
                            XSD.dateTime)
    def test_hash_algorithm_not_uri(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'hash': 'aaaa',
            'hash_algorithm': 'sha1',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(g, checksum, RDF.type, SPDX.Checksum)
        assert self._triple(
            g,
            checksum,
            SPDX.checksumValue,
            resource['hash'],
            data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
        assert self._triple(g, checksum, SPDX.algorithm,
                            resource['hash_algorithm'])
    def test_identifier_extra(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'identifier',
                    'value': 'idxxx'
                },
                {
                    'key': 'guid',
                    'value': 'guidyyy'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier,
                            extras['identifier'])
    def test_distribution_both_urls_the_same_with_access_url(self):

        # when the access_url is present, it should be serialized regardless if it is the same as downloadURL.
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'access_url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL,
                            URIRef(resource['download_url']))
        assert self._triple(g, distribution, DCAT.accessURL,
                            URIRef(resource['access_url']))
    def test_distribution_both_urls_the_same(self):

        # old behavior - only serialize url to accessURL if it is different from downloadURL
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL,
                            URIRef(resource['url']))
        assert self._triple(g, distribution, DCAT.accessURL, None) is None
Example #20
0
    def test_distribution_format_with_backslash(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution,
                                    None)[2]

        assert self._triple(g, distribution, SCHEMA.encodingFormat,
                            resource['format'])
    def test_publisher_extras(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"},
            "extras": [
                {"key": "publisher_uri", "value": "http://example.com/publisher"},
                {"key": "publisher_name", "value": "Example Publisher"},
                {"key": "publisher_email", "value": "*****@*****.**"},
                {"key": "publisher_url", "value": "http://example.com/publisher/home"},
                {"key": "publisher_type", "value": "http://purl.org/adms/publishertype/Company"},
            ],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras["publisher_uri"])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras["publisher_name"])
        assert self._triple(g, publisher, FOAF.mbox, extras["publisher_email"])
        assert self._triple(g, publisher, FOAF.homepage, URIRef(extras["publisher_url"]))
        assert self._triple(g, publisher, DCT.type, extras["publisher_type"])
    def test_publisher_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {'key': 'publisher_uri', 'value': 'http://example.com/publisher'},
                {'key': 'publisher_name', 'value': 'Example Publisher'},
                {'key': 'publisher_email', 'value': '*****@*****.**'},
                {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
                {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
        assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email'])
        assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url']))
        assert self._triple(g, publisher, DCT.type, extras['publisher_type'])
    def test_distribution_size_not_number(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'size': 'aaaa',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.byteSize, resource['size'])
    def test_distribution_both_urls_different(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]

        assert self._triple(g, distribution, SCHEMA.url, resource['url'])
        assert self._triple(g, distribution, SCHEMA.contentUrl, resource['download_url'])
    def test_hash_algorithm_not_uri(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "hash": "aaaa",
            "hash_algorithm": "sha1",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(
            g, checksum, SPDX.checksumValue, resource["hash"], data_type="http://www.w3.org/2001/XMLSchema#hexBinary"
        )
        assert self._triple(g, checksum, SPDX.algorithm, resource["hash_algorithm"])
    def test_distribution_both_urls_the_same(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['url']))
        assert self._triple(g, distribution, DCAT.accessURL, None) is None
    def test_distribution_format_with_backslash(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.mediaType, resource['format'])
    def test_hash_algorithm_not_uri(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'hash': 'aaaa',
            'hash_algorithm': 'sha1',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
        assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
    def test_contact_details_extras(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "maintainer": "Example Maintainer",
            "maintainer_email": "*****@*****.**",
            "author": "Example Author",
            "author_email": "*****@*****.**",
            "extras": [
                {"key": "contact_uri", "value": "http://example.com/contact"},
                {"key": "contact_name", "value": "Example Contact"},
                {"key": "contact_email", "value": "*****@*****.**"},
            ],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        # Contact details

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        eq_(unicode(contact_details), extras["contact_uri"])
        assert self._triple(g, contact_details, VCARD.fn, extras["contact_name"])
        assert self._triple(g, contact_details, VCARD.hasEmail, extras["contact_email"])
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))
        assert_true(subcatalogs)

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
                subdatasets.append((dataset,subcatalog,))
        assert_true(subdatasets)
        
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
            assert_true(has_subcat)
    def test_creators(self):

        creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"},
                    {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"},
                    ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'creator': json.dumps(creators)
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]
        creators.append({'creator_identifier': dataset['creator_identifier'],
                              'creator_name': {DEFAULT_LANG: dataset['creator_name']}})

        creators_dict = dict((v['creator_identifier'], v) for v in creators)

        creators_in = json.loads(d['creator'])

        for c in creators_in:
            assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'],
                                                                                             creators_dict.keys())
            assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\
                "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name'])
        for c in creators_dict.keys():
            assert c in [_c['creator_identifier'] for _c in creators_in]
            cdata = creators_dict[c]
            assert cdata in creators_in
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))
        assert_true(subcatalogs)

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
                subdatasets.append((dataset,subcatalog,))
        assert_true(subdatasets)
        
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
            assert_true(has_subcat)
Example #33
0
def export_package_to_rdf(package_dict, _format='xml'):
    """Exports a package metadata in RDF in the specified format.

    :param dict package_dict: the package metadata.
    :param str _format: the desired format to export to. Default is ``xml``.
    """
    serializer = RDFSerializer()
    return serializer.serialize_dataset(package_dict, _format=_format)
    def test_distribution_fields(self):
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
            'language': '[\"en\", \"es\", \"ca\"]',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload)
        assert self._triple(g, distribution, SCHEMA.name, resource['name'])
        assert self._triple(g, distribution, SCHEMA.description, resource['description'])
        assert self._triple(g, distribution, SCHEMA.license, resource['license'])

        # List
        for item in [
            ('language', SCHEMA.inLanguage),
        ]:
            values = json.loads(resource[item[0]])
            eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, distribution, item[1], value)

        # Dates
        assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued'])
        assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified'])

        # Numbers
        assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
    def test_distribution_fields(self):
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
            'language': '[\"en\", \"es\", \"ca\"]',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload)
        assert self._triple(g, distribution, SCHEMA.name, resource['name'])
        assert self._triple(g, distribution, SCHEMA.description, resource['description'])
        assert self._triple(g, distribution, SCHEMA.license, resource['license'])

        # List
        for item in [
            ('language', SCHEMA.inLanguage),
        ]:
            values = json.loads(resource[item[0]])
            eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, distribution, item[1], value)

        # Dates
        assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued'])
        assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified'])

        # Numbers
        assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
    def test_identifier_id(self):
        dataset = {"id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset"}

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset["id"])
Example #37
0
    def test_distribution_fields(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(
            len([t
                 for t in g.triples((dataset_ref, DCAT.distribution, None))]),
            1)

        # URI
        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
        assert self._triple(g, distribution, DCT.title, resource['name'])
        assert self._triple(g, distribution, DCT.description,
                            resource['description'])
        assert self._triple(g, distribution, DCT.rights, resource['rights'])
        assert self._triple(g, distribution, DCT.license, resource['license'])
        assert self._triple(g, distribution, ADMS.status, resource['status'])

        # Dates
        assert self._triple(g, distribution, DCT.issued, resource['issued'],
                            XSD.dateTime)
        assert self._triple(g, distribution, DCT.modified,
                            resource['modified'], XSD.dateTime)

        # Numbers
        assert self._triple(g, distribution, DCAT.byteSize,
                            float(resource['size']), XSD.decimal)
    def test_graph_from_dataset(self):

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            }],
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'identifier': 'ISBN',
            'temporal_start': '2016-11-01',
            'temporal_end': '2016-11-30',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            'theme': '{ECON,ENVI}',
            'geographical_geonames_url': 'http://www.geonames.org/3181913',
            'language': '{DEU,ENG,ITA}',
            'is_version_of':
            'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to': '{CONF1,CONF2,CONF3}'
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, DCT.identifier,
                            dataset['identifier'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
Example #39
0
def dcat_dataset_show(context, data_dict):

    toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = toolkit.get_action('package_show')(context, data_dict)
    serializer = RDFSerializer()

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
Example #40
0
    def test_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        config[DCAT_EXPOSE_SUBCATALOGS] = 'true'
        s = RDFSerializer()
        g = s.g

        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])

        # check if we have catalog->hasPart->subcatalog
        catalogs = list(g.triples((None, RDF.type, DCAT.Catalog,)))
        root = list(g.subjects(DCT.hasPart, None))
        assert_true(len(catalogs)>0, catalogs)
        assert_true(len(root) == 1, root)

        root_ref = root[0]
        
        # check subcatalog
        subcatalogs = list(g.objects(root_ref, DCT.hasPart))
        assert_true(len(subcatalogs) == 1)
        stitle = list(g.objects(subcatalogs[0], DCT.title))
        assert_true(len(stitle) == 1)
        assert_true(str(stitle[0]) == 'Subcatalog example')

        # check dataset
        dataset_ref = list(g.subjects(RDF.type, DCAT.Dataset))
        assert_true(len(dataset_ref) == 1)
        dataset_ref = dataset_ref[0]
        dataset_title = list(g.objects(dataset_ref, DCT.title))
        assert_true(len(dataset_title) == 1)
        assert_true(unicode(dataset_title[0]) == dataset['title'])
        config[DCAT_EXPOSE_SUBCATALOGS] = 'false'
    def test_temporal_coverage(self):

        load_themes()
        temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                             {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                            ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01T00:00:00',
            'temporal_end':'2016-11-30T00:00:00',
            'temporal_coverage': json.dumps(temporal_coverage),
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]

        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})

        try:
            validators.dcatapit_temporal_coverage(d['temporal_coverage'], {})
            # this should not raise exception
            assert True
        except validators.Invalid, err:
            assert False, "Temporal coverage should be valid: {}".format(err)
Example #42
0
def dcat_dataset_show(context, data_dict):

    toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = toolkit.get_action('package_show')(context, data_dict)

    serializer = RDFSerializer(profiles=data_dict.get('profiles'))

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
Example #43
0
    def test_identifier_id(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_graph_from_catalog_modified_date(self):

        dataset = factories.Dataset()

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
    def test_identifier_id(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
Example #46
0
    def test_graph_from_catalog_modified_date(self):

        dataset = factories.Dataset()

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
    def test_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        g = s.g

        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])

        # check if we have catalog->hasPart->subcatalog
        catalogs = list(g.triples((None, RDF.type, DCAT.Catalog,)))
        root = list(g.subjects(DCT.hasPart, None))
        assert_true(len(catalogs)>0, catalogs)
        assert_true(len(root) == 1, root)

        root_ref = root[0]
        
        # check subcatalog
        subcatalogs = list(g.objects(root_ref, DCT.hasPart))
        assert_true(len(subcatalogs) == 1)
        stitle = list(g.objects(subcatalogs[0], DCT.title))
        assert_true(len(stitle) == 1)
        assert_true(str(stitle[0]) == 'Subcatalog example')

        # check dataset
        dataset_ref = list(g.subjects(RDF.type, DCAT.Dataset))
        assert_true(len(dataset_ref) == 1)
        dataset_ref = dataset_ref[0]
        dataset_title = list(g.objects(dataset_ref, DCT.title))
        assert_true(len(dataset_title) == 1)
        assert_true(unicode(dataset_title[0]) == dataset['title'])
Example #48
0
    def test_publisher_extras(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {
                    'key': 'publisher_uri',
                    'value': 'http://example.com/publisher'
                },
                {
                    'key': 'publisher_name',
                    'value': 'Example Publisher'
                },
                {
                    'key': 'publisher_email',
                    'value': '*****@*****.**'
                },
                {
                    'key': 'publisher_url',
                    'value': 'http://example.com/publisher/home'
                },
                {
                    'key': 'publisher_type',
                    'value': 'http://purl.org/adms/publishertype/Company'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
        assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email'])
        assert self._triple(g, publisher, FOAF.homepage,
                            URIRef(extras['publisher_url']))
        assert self._triple(g, publisher, DCT.type,
                            URIRef(extras['publisher_type']))
    def test_identifier_guid(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "extras": [{"key": "guid", "value": "guidyyy"}],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, extras["guid"])
Example #50
0
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, config.get('ckan.site_url'))
        assert self._triple(g, catalog, DCT.language, 'en')
    def test_distribution_fields(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
        assert self._triple(g, distribution, DCT.title, resource['name'])
        assert self._triple(g, distribution, DCT.description, resource['description'])
        assert self._triple(g, distribution, DCT.rights, resource['rights'])
        assert self._triple(g, distribution, DCT.license, resource['license'])
        assert self._triple(g, distribution, ADMS.status, resource['status'])

        # Dates
        assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime)
        assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime)

        # Numbers
        assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
    def test_catalog(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }
        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)
        data_catalog = self._triple(g, dataset_ref, SCHEMA.includedInDataCatalog, None)[2]
        assert data_catalog
        assert self._triple(g, data_catalog, RDF.type, SCHEMA.DataCatalog)
        assert self._triple(g, data_catalog, SCHEMA.url, 'http://ckan.example.org')
        assert self._triple(g, data_catalog, SCHEMA.name, 'ckan.example.org')
        assert self._triple(g, data_catalog, SCHEMA.description, 'CKAN Portal')
Example #54
0
def dcat_catalog_show(context, data_dict):

    toolkit.check_access('dcat_catalog_show', context, data_dict)

    query = _search_ckan_datasets(context, data_dict)
    dataset_dicts = query['results']
    pagination_info = _pagination_info(query, data_dict)

    serializer = RDFSerializer()

    output = serializer.serialize_catalog({}, dataset_dicts,
                                          _format=data_dict.get('format'),
                                          pagination_info=pagination_info)

    return output
Example #55
0
    def test_alternate_identifier_numeric(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'alternate_identifier', 'value': '1.0'},
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
Example #56
0
def dcat_catalog_show(context, data_dict):

    toolkit.check_access('dcat_catalog_show', context, data_dict)

    query = _search_ckan_datasets(context, data_dict)
    dataset_dicts = query['results']
    pagination_info = _pagination_info(query, data_dict)

    serializer = RDFSerializer(profiles=data_dict.get('profiles'))

    output = serializer.serialize_catalog({}, dataset_dicts,
                                          _format=data_dict.get('format'),
                                          pagination_info=pagination_info)

    return output
Example #57
0
def dcat_markup_dataset_show(context, data_dict):

    p.toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = p.toolkit.get_action('package_show')(context, data_dict)
    #print dataset_dict
    dataset_dict['notes'] = dataset_dict['title']
    #print dataset_dict

    serializer = RDFSerializer(profiles=data_dict.get('profiles'))

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
    def test_alternate_identifier_numeric(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'alternate_identifier', 'value': '1.0'},
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_temporal_start_only(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, parse_date(extras['temporal_start']).isoformat())
    def test_identifier_guid(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'guid', 'value': 'guidyyy'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, extras['guid'])