def test_spatial_one_dct_spatial_instance_no_uri(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) location_ref = BNode() g.add((dataset, DCT.spatial, location_ref)) g.add((location_ref, RDF.type, DCT.Location)) g.add((location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))) g.add((location_ref, SKOS.prefLabel, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert_true('spatial_uri' not in extras) eq_(extras['spatial_text'], 'Newark') eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
def test_distribution_format_IMT_field(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") imt = BNode() g.add((imt, RDF.type, DCT.IMT)) g.add((imt, RDF.value, Literal('text/turtle'))) g.add((imt, RDFS.label, Literal('Turtle'))) g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], imt)) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'Turtle') eq_(resource['mimetype'], u'text/turtle')
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] if toolkit.check_ckan_version(min_version='2.3'): eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv') else: eq_(resource['format'], u'Comma Separated Values')
def test_spatial_both_geojson_and_wkt(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))) g.add((spatial_uri, LOCN.geometry, Literal('POINT (67 89)', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
def test_spatial_wrong_geometries(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('Not GeoJSON', datatype=GEOJSON_IMT))) g.add((spatial_uri, LOCN.geometry, Literal('Not WKT', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert_true('spatial' not in extras)
def test__datasets(self): p = RDFParser() p.g = _default_graph() eq_(len([d for d in p._datasets()]), 3)
def test_distribution_both_access_and_download_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.accessURL, Literal('http://access.url.org'))) g.add((distribution1_1, DCAT.downloadURL, Literal('http://download.url.org'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['url'], u'http://download.url.org') eq_(resource['download_url'], u'http://download.url.org') eq_(resource['access_url'], u'http://access.url.org')
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal("text/csv"))) g.add((distribution1_1, DCT["format"], Literal("Comma Separated Values"))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]["resources"][0] if toolkit.check_ckan_version(min_version="2.3"): eq_(resource["format"], u"CSV") eq_(resource["mimetype"], u"text/csv") else: eq_(resource["format"], u"Comma Separated Values")
def test_datasets_none_found(self): p = RDFParser() p.g = Graph() eq_(len([d for d in p.datasets()]), 0)
def test_spatial_multiple_dct_spatial_instances(self): g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef("http://geonames/Newark") g.add((dataset, DCT.spatial, spatial_uri)) location_ref = BNode() g.add((location_ref, RDF.type, DCT.Location)) g.add((dataset, DCT.spatial, location_ref)) g.add( (location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT)) ) location_ref = BNode() g.add((location_ref, RDF.type, DCT.Location)) g.add((dataset, DCT.spatial, location_ref)) g.add((location_ref, SKOS.prefLabel, Literal("Newark"))) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras["spatial_uri"], "http://geonames/Newark") eq_(extras["spatial_text"], "Newark") eq_(extras["spatial"], '{"type": "Point", "coordinates": [23, 45]}')
def test_parse_subcatalog(self): publisher = {'name': 'Publisher', 'email': '*****@*****.**', 'type': 'Publisher', 'uri': 'http://pub.lish.er'} dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'test dataset', 'extras': [ {'key': 'source_catalog_title', 'value': 'Subcatalog example'}, {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'}, {'key': 'source_catalog_description', 'value': 'Subcatalog example description'}, {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'}, {'key': 'source_catalog_modified', 'value': '2000-01-01'}, {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)} ] } catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } s = RDFSerializer() s.serialize_catalog(catalog_dict, dataset_dicts=[dataset]) g = s.g p = RDFParser(profiles=['euro_dcat_ap']) p.g = g # at least one subcatalog with hasPart subcatalogs = list(p.g.objects(None, DCT.hasPart)) assert_true(subcatalogs) # at least one dataset in subcatalogs subdatasets = [] for subcatalog in subcatalogs: datasets = p.g.objects(subcatalog, DCAT.dataset) for dataset in datasets: subdatasets.append((dataset,subcatalog,)) assert_true(subdatasets) datasets = dict([(d['title'], d) for d in p.datasets()]) for subdataset, subcatalog in subdatasets: title = unicode(list(p.g.objects(subdataset, DCT.title))[0]) dataset = datasets[title] has_subcat = False for ex in dataset['extras']: exval = ex['value'] exkey = ex['key'] if exkey == 'source_catalog_homepage': has_subcat = True eq_(exval, unicode(subcatalog)) # check if we had subcatalog in extras assert_true(has_subcat)
def test_profiles_are_called_on_datasets(self): p = RDFParser() p._profiles = [MockRDFProfile1, MockRDFProfile2] p.g = _default_graph() for dataset in p.datasets(): assert dataset['profile_1'] assert dataset['profile_2']
def test_tags_with_commas(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] eq_(len(datasets[0]['tags']), 3)
def test_tags_with_commas(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] assert len(datasets[0]['tags']) == 3
def test_datasets(self): p = RDFParser() p.g = _default_graph() datasets = [] for dataset in p.datasets(): assert 'title' in dataset datasets.append(dataset) eq_(len(datasets), 3)
def test_tags_with_commas_clean_tags_on(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] assert_true(self.VALID_TAG in datasets[0]['tags']) assert_true(self.INVALID_TAG not in datasets[0]['tags'])
def test_datasets(self): p = RDFParser() p.g = _default_graph() datasets = [] for dataset in p.datasets(): assert 'title' in dataset datasets.append(dataset) assert len(datasets) == 3
def test_tags_with_commas_clean_tags_on(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] assert self.VALID_TAG in datasets[0]['tags'] assert self.INVALID_TAG not in datasets[0]['tags']
def test_tags_with_commas_clean_tags_off(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g # when config flag is set to false, bad tags can happen datasets = [d for d in p.datasets()] assert_true(self.VALID_TAG not in datasets[0]['tags']) assert_true({'name': self.INVALID_TAG} in datasets[0]['tags'])
def test_tags_with_commas_clean_tags_off(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g # when config flag is set to false, bad tags can happen datasets = [d for d in p.datasets()] assert self.VALID_TAG not in datasets[0]['tags'] assert {'name': self.INVALID_TAG} in datasets[0]['tags']
def test_dataset_version_adms(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) g.add((dataset1, ADMS.version, Literal('2.3a'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] assert dataset['version'] == u'2.3a'
def test_dataset_version_adms(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) g.add((dataset1, ADMS.version, Literal('2.3a'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['version'], u'2.3a')
def test_dataset_contact_point_vcard_hasVN_literal(self): g = Graph() dataset_ref = URIRef("http://example.org/datasets/1") g.add((dataset_ref, RDF.type, DCAT.Dataset)) contact_point = BNode() g.add((contact_point, RDF.type, VCARD.Organization)) g.add((contact_point, VCARD.hasFN, Literal('Point of Contact'))) g.add((dataset_ref, DCAT.contactPoint, contact_point)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] extras = self._extras(dataset) assert extras['contact_name'] == 'Point of Contact'
def test_spatial_uri_only(self): g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef("http://geonames/Newark") g.add((dataset, DCT.spatial, spatial_uri)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras["spatial_uri"], "http://geonames/Newark") assert_true("spatial_text" not in extras) assert_true("spatial" not in extras)
def test_spatial_uri_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert extras['spatial_uri'] == 'http://geonames/Newark' assert 'spatial_text' not in extras assert 'spatial' not in extras
def test_dataset_license_from_distribution_by_uri(self): # license_id retrieved from the URI of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) g.add((distribution, DCT.license, URIRef("http://www.opendefinition.org/licenses/cc-by"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_spatial_literal_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCT.spatial, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial_text'], 'Newark') assert_true('spatial_uri' not in extras) assert_true('spatial' not in extras)
def test_spatial_uri_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial_uri'], 'http://geonames/Newark') assert_true('spatial_text' not in extras) assert_true('spatial' not in extras)
def test_dataset_license_from_distribution_by_uri(self): # license_id retrieved from the URI of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) g.add((distribution, DCT.license, URIRef("http://www.opendefinition.org/licenses/cc-by"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] assert dataset['license_id'] == 'cc-by'
def test_distribution_format_format_only(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('CSV'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'CSV')
def test_dataset_license_from_distribution_by_title(self): # license_id retrieved from dct:title of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution, RDF.type, DCAT.Distribution)) g.add((dataset, DCAT.distribution, distribution)) license = BNode() g.add((distribution, DCT.license, license)) g.add((license, DCT.title, Literal("Creative Commons Attribution"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] assert dataset['license_id'] == 'cc-by'
def test_dataset_license_from_distribution_by_title(self): # license_id retrieved from dct:title of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution, RDF.type, DCAT.Distribution)) g.add((dataset, DCAT.distribution, distribution)) license = BNode() g.add((distribution, DCT.license, license)) g.add((license, DCT.title, Literal("Creative Commons Attribution"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_distribution_format_imt_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/unknown-imt'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] assert resource['format'] == u'text/unknown-imt' assert resource['mimetype'] == u'text/unknown-imt'
def test_distribution_format_format_only_with_slash_normalize_false(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('text/csv'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] assert resource['format'] == u'text/csv' assert resource['mimetype'] == u'text/csv'
def test_dataset_contact_point_vcard_hasEmail_hasValue(self): g = Graph() dataset_ref = URIRef("http://example.org/datasets/1") g.add((dataset_ref, RDF.type, DCAT.Dataset)) contact_point = BNode() g.add((contact_point, RDF.type, VCARD.Organization)) hasEmail = BNode() g.add((hasEmail, VCARD.hasValue, Literal('mailto:[email protected]'))) g.add((contact_point, VCARD.hasEmail, hasEmail)) g.add((contact_point, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_point)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] extras = self._extras(dataset) assert extras['contact_email'] == '*****@*****.**'
def test_distribution_format_unknown_imt(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal("text/unknown-imt"))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]["resources"][0] eq_(resource["format"], u"text/unknown-imt") eq_(resource["mimetype"], u"text/unknown-imt")
def test_spatial_rdfs_label(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, RDFS.label, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert extras['spatial_text'] == 'Newark'
def test_distribution_download_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.downloadURL, Literal('http://download.url.org'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['url'], u'http://download.url.org') eq_(resource['download_url'], u'http://download.url.org')
def test_distribution_format_format_only_normalize_false(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT["format"], Literal("CSV"))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]["resources"][0] eq_(resource["format"], u"CSV") assert "mimetype" not in resource
def test_spatial_wkt_only(self): g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef("http://geonames/Newark") g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal("POINT (67 89)", datatype=GSP.wktLiteral))) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) # NOTE: geomet returns floats for coordinates on WKT -> GeoJSON eq_(extras["spatial"], '{"type": "Point", "coordinates": [67.0, 89.0]}')
def test_spatial_rdfs_label(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, RDFS.label, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial_text'], 'Newark')
def test_distribution_access_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.accessURL, Literal("http://access.url.org"))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=["euro_dcat_ap"]) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]["resources"][0] eq_(resource["url"], u"http://access.url.org") assert "download_url" not in resource
def test_distribution_format_imt_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/unknown-imt'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'text/unknown-imt') eq_(resource['mimetype'], u'text/unknown-imt')
def _build_and_parse_format_mediatype_graph(self, format_item=None, mediatype_item=None): g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, self.DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, self.DCAT.distribution, distribution)) g.add((distribution, RDF.type, self.DCAT.Distribution)) if format_item: g.add((distribution, self.DCT['format'], format_item)) if mediatype_item: g.add((distribution, self.DCAT.mediaType, mediatype_item)) if format_item is None and mediatype_item is None: raise AssertionError('At least one of format or mediaType is required!') p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de']) p.g = g dataset = [d for d in p.datasets()][0] return dataset.get('resources')
def _build_and_parse_format_mediatype_graph(self, format_item=None, mediatype_item=None): """ Creates a minimal graph with a distribution having the specified dct:format and dcat:mediaType nodes. At least one of those nodes has to be given. After creating the graph, it is parsed using the euro_dcat_ap profile. :param format_item: Literal or URIRef object for dct:format. None if the node should be omitted. :param mediatype_item: Literal or URIRef object for dcat:mediaType. None if the node should be omitted. :returns: The parsed resource dict """ g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if format_item: g.add((distribution, DCT['format'], format_item)) if mediatype_item: g.add((distribution, DCAT.mediaType, mediatype_item)) if format_item is None and mediatype_item is None: raise AssertionError( 'At least one of format or mediaType is required!') p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] return dataset.get('resources')
def test_distribution_format_format_only_without_slash_normalize_false( self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'Comma Separated Values') assert 'mimetype' not in resource
def test_spatial_wkt_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('POINT (67 89)', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) # NOTE: geomet returns floats for coordinates on WKT -> GeoJSON assert extras[ 'spatial'], '{"type": "Point", "coordinates": [67.0 == 89.0]}'
def test_spatial_wrong_geometries(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('Not GeoJSON', datatype=GEOJSON_IMT))) g.add((spatial_uri, LOCN.geometry, Literal('Not WKT', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert 'spatial' not in extras
def _build_and_parse_format_mediatype_graph(self, format_item=None, mediatype_item=None): """ Creates a minimal graph with a distribution having the specified dct:format and dcat:mediaType nodes. At least one of those nodes has to be given. After creating the graph, it is parsed using the euro_dcat_ap profile. :param format_item: Literal or URIRef object for dct:format. None if the node should be omitted. :param mediatype_item: Literal or URIRef object for dcat:mediaType. None if the node should be omitted. :returns: The parsed resource dict """ g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if format_item: g.add((distribution, DCT['format'], format_item)) if mediatype_item: g.add((distribution, DCAT.mediaType, mediatype_item)) if format_item is None and mediatype_item is None: raise AssertionError('At least one of format or mediaType is required!') p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] return dataset.get('resources')