def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, {'key': 'spatial_text', 'value': 'Tarragona'}, {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2] assert spatial eq_(unicode(spatial), extras['spatial_uri']) assert self._triple(g, spatial, RDF.type, SCHEMA.Place) assert self._triple(g, spatial, SCHEMA.description, extras['spatial_text']) geo = self._triple(g, spatial, SCHEMA.geo, None)[2] assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape) assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
def test_publisher_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', }, 'extras': [ { 'key': 'publisher_uri', 'value': 'http://example.com/publisher' }, { 'key': 'publisher_name', 'value': 'Example Publisher' }, { 'key': 'publisher_email', 'value': '*****@*****.**' }, { 'key': 'publisher_url', 'value': 'http://example.com/publisher/home' }, { 'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2] assert publisher eq_(unicode(publisher), extras['publisher_uri']) assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint) assert self._triple(g, contact_point, SCHEMA.name, extras['publisher_name']) assert self._triple(g, contact_point, SCHEMA.email, extras['publisher_email']) assert self._triple(g, contact_point, SCHEMA.url, extras['publisher_url']) assert self._triple(g, contact_point, SCHEMA.contactType, 'customer service')
def test_distribution_both_urls_the_same(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "url": "http://example.com/data/file.csv", "download_url": "http://example.com/data/file.csv", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource["url"])) assert self._triple(g, distribution, DCAT.accessURL, None) is None
def test_distribution_format(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'CSV', 'mimetype': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['format']) assert self._triple(g, distribution, SCHEMA.fileType, resource['mimetype'])
def test_temporal_start_and_end(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774' }, { 'key': 'temporal_end', 'value': '2015-07-14' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, '2015-06-26T15:21:09.075774/2015-07-14')
def test_distribution_format(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'CSV', 'mimetype': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCT['format'], resource['format']) assert self._triple(g, distribution, DCAT.mediaType, resource['mimetype'])
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': 'xyz'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language), ('theme', DCAT.theme), ('conforms_to', DCAT.conformsTo), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], value)
def test_distribution_format_with_backslash(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "url": "http://example.com/data/file.csv", "format": "text/csv", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.mediaType, resource["format"])
def test_contact_details_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'maintainer': 'Example Maintainer', 'maintainer_email': '*****@*****.**', 'author': 'Example Author', 'author_email': '*****@*****.**', 'extras': [ {'key': 'contact_uri', 'value': 'http://example.com/contact'}, {'key': 'contact_name', 'value': 'Example Contact'}, {'key': 'contact_email', 'value': '*****@*****.**'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) # Contact details contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details eq_(unicode(contact_details), extras['contact_uri']) assert self._triple(g, contact_details, VCARD.fn, extras['contact_name']) assert self._triple(g, contact_details, VCARD.hasEmail, extras['contact_email'])
def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, {'key': 'spatial_text', 'value': 'Tarragona'}, {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial eq_(unicode(spatial), extras['spatial_uri']) assert self._triple(g, spatial, RDF.type, DCT.Location) assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2) # Geometry in GeoJSON assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) # Geometry in WKT wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
def test_distribution_both_urls_the_same(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.contentUrl, resource['url']) assert self._triple(g, distribution, SCHEMA.url, None) is None
def test_spatial_bad_json_no_wkt(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'spatial', 'value': 'NotJSON' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert_true(isinstance(spatial, BNode)) # Geometry in GeoJSON assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) # Geometry in WKT eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 1)
def test_distribution_prefer_access_url(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data', 'access_url': 'http://example.com/data/file', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.accessURL, URIRef(resource['access_url'])) assert self._triple(g, distribution, DCAT.downloadURL, None) is None
def _build_graph_and_check_format_mediatype(self, dataset_dict, expected_format, expected_mediatype): """ Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the first resource element. :param dataset_dict: dataset dict, expected to contain one resource :param expected_format: expected list of dct:format items in the resource :param expected_mediatype: expected list of dcat:mediaType items in the resource """ s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset_dict) # graph should contain the expected nodes resource_ref = list(g.objects(dataset_ref, DCAT.distribution))[0] dct_format = list(g.objects(resource_ref, DCT['format'])) dcat_mediatype = list(g.objects(resource_ref, DCAT.mediaType)) eq_(expected_format, dct_format) eq_(expected_mediatype, dcat_mediatype)
def test_temporal(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774' }, { 'key': 'temporal_end', 'value': '2015-07-14' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) temporal = self._triple(g, dataset_ref, DCT.temporal, None)[2] assert temporal assert self._triple(g, temporal, RDF.type, DCT.PeriodOfTime) assert self._triple(g, temporal, SCHEMA.startDate, parse_date(extras['temporal_start']).isoformat(), XSD.dateTime) assert self._triple(g, temporal, SCHEMA.endDate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime)
def test_hash_algorithm_not_uri(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'hash': 'aaaa', 'hash_algorithm': 'sha1', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple(g, checksum, RDF.type, SPDX.Checksum) assert self._triple( g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
def test_identifier_extra(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'identifier', 'value': 'idxxx' }, { 'key': 'guid', 'value': 'guidyyy' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras['identifier'])
def test_distribution_both_urls_the_same_with_access_url(self): # when the access_url is present, it should be serialized regardless if it is the same as downloadURL. resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'access_url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['download_url'])) assert self._triple(g, distribution, DCAT.accessURL, URIRef(resource['access_url']))
def test_distribution_both_urls_the_same(self): # old behavior - only serialize url to accessURL if it is different from downloadURL resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['url'])) assert self._triple(g, distribution, DCAT.accessURL, None) is None
def test_distribution_format_with_backslash(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['format'])
def test_publisher_extras(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"}, "extras": [ {"key": "publisher_uri", "value": "http://example.com/publisher"}, {"key": "publisher_name", "value": "Example Publisher"}, {"key": "publisher_email", "value": "*****@*****.**"}, {"key": "publisher_url", "value": "http://example.com/publisher/home"}, {"key": "publisher_type", "value": "http://purl.org/adms/publishertype/Company"}, ], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher eq_(unicode(publisher), extras["publisher_uri"]) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras["publisher_name"]) assert self._triple(g, publisher, FOAF.mbox, extras["publisher_email"]) assert self._triple(g, publisher, FOAF.homepage, URIRef(extras["publisher_url"])) assert self._triple(g, publisher, DCT.type, extras["publisher_type"])
def test_publisher_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', }, 'extras': [ {'key': 'publisher_uri', 'value': 'http://example.com/publisher'}, {'key': 'publisher_name', 'value': 'Example Publisher'}, {'key': 'publisher_email', 'value': '*****@*****.**'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher eq_(unicode(publisher), extras['publisher_uri']) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras['publisher_name']) assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email']) assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url'])) assert self._triple(g, publisher, DCT.type, extras['publisher_type'])
def test_distribution_size_not_number(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'size': 'aaaa', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.byteSize, resource['size'])
def test_distribution_both_urls_different(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.url, resource['url']) assert self._triple(g, distribution, SCHEMA.contentUrl, resource['download_url'])
def test_hash_algorithm_not_uri(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "hash": "aaaa", "hash_algorithm": "sha1", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple( g, checksum, SPDX.checksumValue, resource["hash"], data_type="http://www.w3.org/2001/XMLSchema#hexBinary" ) assert self._triple(g, checksum, SPDX.algorithm, resource["hash_algorithm"])
def test_distribution_both_urls_the_same(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['url'])) assert self._triple(g, distribution, DCAT.accessURL, None) is None
def test_distribution_format_with_backslash(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.mediaType, resource['format'])
def test_hash_algorithm_not_uri(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'hash': 'aaaa', 'hash_algorithm': 'sha1', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
def test_contact_details_extras(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "maintainer": "Example Maintainer", "maintainer_email": "*****@*****.**", "author": "Example Author", "author_email": "*****@*****.**", "extras": [ {"key": "contact_uri", "value": "http://example.com/contact"}, {"key": "contact_name", "value": "Example Contact"}, {"key": "contact_email", "value": "*****@*****.**"}, ], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) # Contact details contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details eq_(unicode(contact_details), extras["contact_uri"]) assert self._triple(g, contact_details, VCARD.fn, extras["contact_name"]) assert self._triple(g, contact_details, VCARD.hasEmail, extras["contact_email"])
def test_parse_subcatalog(self): publisher = {'name': 'Publisher', 'email': '*****@*****.**', 'type': 'Publisher', 'uri': 'http://pub.lish.er'} dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'test dataset', 'extras': [ {'key': 'source_catalog_title', 'value': 'Subcatalog example'}, {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'}, {'key': 'source_catalog_description', 'value': 'Subcatalog example description'}, {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'}, {'key': 'source_catalog_modified', 'value': '2000-01-01'}, {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)} ] } catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } s = RDFSerializer() s.serialize_catalog(catalog_dict, dataset_dicts=[dataset]) g = s.g p = RDFParser(profiles=['euro_dcat_ap']) p.g = g # at least one subcatalog with hasPart subcatalogs = list(p.g.objects(None, DCT.hasPart)) assert_true(subcatalogs) # at least one dataset in subcatalogs subdatasets = [] for subcatalog in subcatalogs: datasets = p.g.objects(subcatalog, DCAT.dataset) for dataset in datasets: subdatasets.append((dataset,subcatalog,)) assert_true(subdatasets) datasets = dict([(d['title'], d) for d in p.datasets()]) for subdataset, subcatalog in subdatasets: title = unicode(list(p.g.objects(subdataset, DCT.title))[0]) dataset = datasets[title] has_subcat = False for ex in dataset['extras']: exval = ex['value'] exkey = ex['key'] if exkey == 'source_catalog_homepage': has_subcat = True eq_(exval, unicode(subcatalog)) # check if we had subcatalog in extras assert_true(has_subcat)
def test_creators(self): creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"}, {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"}, ] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01', 'temporal_end':'2016-11-30', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':'ISBN,TEST', 'theme':'{ECON,ENVI}', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'creator': json.dumps(creators) } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] creators.append({'creator_identifier': dataset['creator_identifier'], 'creator_name': {DEFAULT_LANG: dataset['creator_name']}}) creators_dict = dict((v['creator_identifier'], v) for v in creators) creators_in = json.loads(d['creator']) for c in creators_in: assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'], creators_dict.keys()) assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\ "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name']) for c in creators_dict.keys(): assert c in [_c['creator_identifier'] for _c in creators_in] cdata = creators_dict[c] assert cdata in creators_in
def export_package_to_rdf(package_dict, _format='xml'): """Exports a package metadata in RDF in the specified format. :param dict package_dict: the package metadata. :param str _format: the desired format to export to. Default is ``xml``. """ serializer = RDFSerializer() return serializer.serialize_dataset(package_dict, _format=_format)
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, 'language': '[\"en\", \"es\", \"ca\"]', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload) assert self._triple(g, distribution, SCHEMA.name, resource['name']) assert self._triple(g, distribution, SCHEMA.description, resource['description']) assert self._triple(g, distribution, SCHEMA.license, resource['license']) # List for item in [ ('language', SCHEMA.inLanguage), ]: values = json.loads(resource[item[0]]) eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values)) for value in values: assert self._triple(g, distribution, item[1], value) # Dates assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued']) assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified']) # Numbers assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
def test_identifier_id(self): dataset = {"id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset"} s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset["id"])
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_( len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name']) assert self._triple(g, distribution, DCT.description, resource['description']) assert self._triple(g, distribution, DCT.rights, resource['rights']) assert self._triple(g, distribution, DCT.license, resource['license']) assert self._triple(g, distribution, ADMS.status, resource['status']) # Dates assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime) assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime) # Numbers assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'identifier': 'ISBN', 'temporal_start': '2016-11-01', 'temporal_end': '2016-11-30', 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': 'bolzano', 'holder_identifier': '234234234', 'alternate_identifier': 'ISBN,TEST', 'theme': '{ECON,ENVI}', 'geographical_geonames_url': 'http://www.geonames.org/3181913', 'language': '{DEU,ENG,ITA}', 'is_version_of': 'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to': '{CONF1,CONF2,CONF3}' } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
def dcat_dataset_show(context, data_dict): toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = toolkit.get_action('package_show')(context, data_dict) serializer = RDFSerializer() output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def test_subcatalog(self): publisher = {'name': 'Publisher', 'email': '*****@*****.**', 'type': 'Publisher', 'uri': 'http://pub.lish.er'} dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'test dataset', 'extras': [ {'key': 'source_catalog_title', 'value': 'Subcatalog example'}, {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'}, {'key': 'source_catalog_description', 'value': 'Subcatalog example description'}, {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'}, {'key': 'source_catalog_modified', 'value': '2000-01-01'}, {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)} ] } catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } config[DCAT_EXPOSE_SUBCATALOGS] = 'true' s = RDFSerializer() g = s.g s.serialize_catalog(catalog_dict, dataset_dicts=[dataset]) # check if we have catalog->hasPart->subcatalog catalogs = list(g.triples((None, RDF.type, DCAT.Catalog,))) root = list(g.subjects(DCT.hasPart, None)) assert_true(len(catalogs)>0, catalogs) assert_true(len(root) == 1, root) root_ref = root[0] # check subcatalog subcatalogs = list(g.objects(root_ref, DCT.hasPart)) assert_true(len(subcatalogs) == 1) stitle = list(g.objects(subcatalogs[0], DCT.title)) assert_true(len(stitle) == 1) assert_true(str(stitle[0]) == 'Subcatalog example') # check dataset dataset_ref = list(g.subjects(RDF.type, DCAT.Dataset)) assert_true(len(dataset_ref) == 1) dataset_ref = dataset_ref[0] dataset_title = list(g.objects(dataset_ref, DCT.title)) assert_true(len(dataset_title) == 1) assert_true(unicode(dataset_title[0]) == dataset['title']) config[DCAT_EXPOSE_SUBCATALOGS] = 'false'
def test_temporal_coverage(self): load_themes() temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'}, {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'}, ] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01T00:00:00', 'temporal_end':'2016-11-30T00:00:00', 'temporal_coverage': json.dumps(temporal_coverage), 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':'ISBN,TEST', 'theme':'{ECON,ENVI}', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] temporal_coverage.append({'temporal_start': dataset['temporal_start'], 'temporal_end': dataset['temporal_end']}) try: validators.dcatapit_temporal_coverage(d['temporal_coverage'], {}) # this should not raise exception assert True except validators.Invalid, err: assert False, "Temporal coverage should be valid: {}".format(err)
def dcat_dataset_show(context, data_dict): toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = toolkit.get_action('package_show')(context, data_dict) serializer = RDFSerializer(profiles=data_dict.get('profiles')) output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def test_identifier_id(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
def test_graph_from_catalog_modified_date(self): dataset = factories.Dataset() s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
def test_subcatalog(self): publisher = {'name': 'Publisher', 'email': '*****@*****.**', 'type': 'Publisher', 'uri': 'http://pub.lish.er'} dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'test dataset', 'extras': [ {'key': 'source_catalog_title', 'value': 'Subcatalog example'}, {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'}, {'key': 'source_catalog_description', 'value': 'Subcatalog example description'}, {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'}, {'key': 'source_catalog_modified', 'value': '2000-01-01'}, {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)} ] } catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } s = RDFSerializer() g = s.g s.serialize_catalog(catalog_dict, dataset_dicts=[dataset]) # check if we have catalog->hasPart->subcatalog catalogs = list(g.triples((None, RDF.type, DCAT.Catalog,))) root = list(g.subjects(DCT.hasPart, None)) assert_true(len(catalogs)>0, catalogs) assert_true(len(root) == 1, root) root_ref = root[0] # check subcatalog subcatalogs = list(g.objects(root_ref, DCT.hasPart)) assert_true(len(subcatalogs) == 1) stitle = list(g.objects(subcatalogs[0], DCT.title)) assert_true(len(stitle) == 1) assert_true(str(stitle[0]) == 'Subcatalog example') # check dataset dataset_ref = list(g.subjects(RDF.type, DCAT.Dataset)) assert_true(len(dataset_ref) == 1) dataset_ref = dataset_ref[0] dataset_title = list(g.objects(dataset_ref, DCT.title)) assert_true(len(dataset_title) == 1) assert_true(unicode(dataset_title[0]) == dataset['title'])
def test_publisher_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', }, 'extras': [ { 'key': 'publisher_uri', 'value': 'http://example.com/publisher' }, { 'key': 'publisher_name', 'value': 'Example Publisher' }, { 'key': 'publisher_email', 'value': '*****@*****.**' }, { 'key': 'publisher_url', 'value': 'http://example.com/publisher/home' }, { 'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher eq_(unicode(publisher), extras['publisher_uri']) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras['publisher_name']) assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email']) assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url'])) assert self._triple(g, publisher, DCT.type, URIRef(extras['publisher_type']))
def test_identifier_guid(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "extras": [{"key": "guid", "value": "guidyyy"}], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras["guid"])
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, config.get('ckan.site_url')) assert self._triple(g, catalog, DCT.language, 'en')
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name']) assert self._triple(g, distribution, DCT.description, resource['description']) assert self._triple(g, distribution, DCT.rights, resource['rights']) assert self._triple(g, distribution, DCT.license, resource['license']) assert self._triple(g, distribution, ADMS.status, resource['status']) # Dates assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime) assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime) # Numbers assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url'))) assert self._triple(g, catalog, DCT.language, 'en')
def test_catalog(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) data_catalog = self._triple(g, dataset_ref, SCHEMA.includedInDataCatalog, None)[2] assert data_catalog assert self._triple(g, data_catalog, RDF.type, SCHEMA.DataCatalog) assert self._triple(g, data_catalog, SCHEMA.url, 'http://ckan.example.org') assert self._triple(g, data_catalog, SCHEMA.name, 'ckan.example.org') assert self._triple(g, data_catalog, SCHEMA.description, 'CKAN Portal')
def dcat_catalog_show(context, data_dict): toolkit.check_access('dcat_catalog_show', context, data_dict) query = _search_ckan_datasets(context, data_dict) dataset_dicts = query['results'] pagination_info = _pagination_info(query, data_dict) serializer = RDFSerializer() output = serializer.serialize_catalog({}, dataset_dicts, _format=data_dict.get('format'), pagination_info=pagination_info) return output
def test_alternate_identifier_numeric(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'alternate_identifier', 'value': '1.0'}, ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
def dcat_catalog_show(context, data_dict): toolkit.check_access('dcat_catalog_show', context, data_dict) query = _search_ckan_datasets(context, data_dict) dataset_dicts = query['results'] pagination_info = _pagination_info(query, data_dict) serializer = RDFSerializer(profiles=data_dict.get('profiles')) output = serializer.serialize_catalog({}, dataset_dicts, _format=data_dict.get('format'), pagination_info=pagination_info) return output
def dcat_markup_dataset_show(context, data_dict): p.toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = p.toolkit.get_action('package_show')(context, data_dict) #print dataset_dict dataset_dict['notes'] = dataset_dict['title'] #print dataset_dict serializer = RDFSerializer(profiles=data_dict.get('profiles')) output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def test_temporal_start_only(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, parse_date(extras['temporal_start']).isoformat())
def test_identifier_guid(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'guid', 'value': 'guidyyy'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras['guid'])