def test_subthemes(self): load_themes() subthemes = [{ 'theme': 'AGRI', 'subthemes': [ 'http://eurovoc.europa.eu/100253', 'http://eurovoc.europa.eu/100258' ] }, { 'theme': 'ENVI', 'subthemes': [] }] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': 'bolzano', 'holder_identifier': '234234234', 'alternate_identifier': 'ISBN,TEST', 'theme': json.dumps(subthemes), } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] themes = json.loads(dataset['theme']) assert (len(themes) == len(subthemes) == 2) for t in themes: if t['theme'] == 'ENVI': assert t['subthemes'] == [] elif t['theme'] == 'AGRI': assert set(t['subthemes']) == set(subthemes[0]['subthemes']) else: assert False, "Unknown theme: {}".format(t)
def test_creators(self): creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"}, {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"}, ] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01', 'temporal_end':'2016-11-30', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':'ISBN,TEST', 'theme':'{ECON,ENVI}', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'creator': json.dumps(creators) } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] creators.append({'creator_identifier': dataset['creator_identifier'], 'creator_name': {DEFAULT_LANG: dataset['creator_name']}}) creators_dict = dict((v['creator_identifier'], v) for v in creators) creators_in = json.loads(d['creator']) for c in creators_in: assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'], creators_dict.keys()) assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\ "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name']) for c in creators_dict.keys(): assert c in [_c['creator_identifier'] for _c in creators_in] cdata = creators_dict[c] assert cdata in creators_in
def export_package_to_rdf(package_dict, _format='xml'): """Exports a package metadata in RDF in the specified format. :param dict package_dict: the package metadata. :param str _format: the desired format to export to. Default is ``xml``. """ serializer = RDFSerializer() return serializer.serialize_dataset(package_dict, _format=_format)
def test_temporal_coverage(self): load_themes() temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'}, {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'}, ] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01T00:00:00', 'temporal_end':'2016-11-30T00:00:00', 'temporal_coverage': json.dumps(temporal_coverage), 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':'ISBN,TEST', 'theme':'{ECON,ENVI}', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] temporal_coverage.append({'temporal_start': dataset['temporal_start'], 'temporal_end': dataset['temporal_end']}) try: validators.dcatapit_temporal_coverage(d['temporal_coverage'], {}) # this should not raise exception assert True except validators.Invalid, err: assert False, "Temporal coverage should be valid: {}".format(err)
def dcat_dataset_show(context, data_dict): toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = toolkit.get_action('package_show')(context, data_dict) serializer = RDFSerializer() output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def dcat_dataset_show(context, data_dict): toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = toolkit.get_action('package_show')(context, data_dict) serializer = RDFSerializer(profiles=data_dict.get('profiles')) output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def dcat_markup_dataset_show(context, data_dict): p.toolkit.check_access('dcat_dataset_show', context, data_dict) dataset_dict = p.toolkit.get_action('package_show')(context, data_dict) #print dataset_dict dataset_dict['notes'] = dataset_dict['title'] #print dataset_dict serializer = RDFSerializer(profiles=data_dict.get('profiles')) output = serializer.serialize_dataset(dataset_dict, _format=data_dict.get('format')) return output
def test_conforms_to(self): conforms_to_in = [{'identifier': 'CONF1', 'uri': 'http://conf01/abc', 'title': {'en': 'title', 'it': 'title'}, 'referenceDocumentation': ['http://abc.efg/'],}, {'identifier': 'CONF2', 'title': {'en': 'title', 'it': 'title'}, 'description': {'en': 'descen', 'it': 'descit'}, 'referenceDocumentation': ['http://abc.efg/'],}, ] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01', 'temporal_end':'2016-11-30', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':'ISBN,TEST', 'theme':'{ECON,ENVI}', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to':json.dumps(conforms_to_in) } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 d = datasets[0] conforms_to = dict((d['identifier'], d) for d in conforms_to_in) dataset_conforms_to = json.loads(d['conforms_to']) assert len(dataset_conforms_to) == len(conforms_to_in), "got {}, should be {}".format(len(d['conforms_to']), len(conforms_to_in)) for conf in dataset_conforms_to: check = conforms_to[conf['identifier']] for k,v in check.items(): # there should be no empty uri if k == 'uri' and not v: assert conf.get(k) is None else: assert conf.get(k) == v for k, v in conf.items(): src_v = check.get(k) # ref may be extracted from rdf, but it can be # generated by serializer if not src_v and k == 'uri': continue # no value, may be missing key in source elif not src_v: assert not check.get(k) else: assert check[k] == v
def test_license(self): def get_path(fname): return os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', fname) licenses = get_path('licenses.rdf') load_from_graph(path=licenses) Session.flush() dataset = {'title': 'some title', 'id': 'sometitle', 'resources': [ { 'id': 'resource/1111', 'uri': 'http://resource/1111', 'license_type': 'invalid', }, { 'id': 'resource/2222', 'uri': 'http://resource/2222', 'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13' } ] } p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) s = RDFSerializer() dataset_ref = s.graph_from_dataset(dataset) g = s.g r1 = URIRef(dataset['resources'][0]['uri']) r2 = URIRef(dataset['resources'][1]['uri']) unknown = License.get(License.DEFAULT_LICENSE) license_ref = g.value(r1, DCT.license) assert license_ref is not None assert str(license_ref) == unknown.uri,\ "got license {}, instead of {}".format(license_ref, unknown.license_type) gpl = License.get(dataset['resources'][1]['license_type']) assert gpl is not None license_ref = g.value(r2, DCT.license) license_type = g.value(license_ref, DCT.type) assert license_ref is not None assert str(license_ref) == gpl.document_uri assert str(license_type) == gpl.license_type serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 new_dataset = datasets[0] resources = new_dataset['resources'] def _find_res(res_uri): for res in resources: if res_uri == res['uri']: return res raise ValueError("No resource for {}".format(res_uri)) new_res_unknown = _find_res(str(r1)) new_res_gpl = _find_res(str(r2)) assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,) assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
def test_holder(self): org = {'name': 'org-test', 'title': 'Test org', 'identifier': "abc"} pkg1 = { 'id': '2b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset-1', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'modified':'2016-11-29', 'identifier':'ISBNabc', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'theme':'{ECON,ENVI}', 'dataset_is_local': False, 'language':'{DEU,ENG,ITA}', } pkg2 = { 'id': 'eb6fe9ca-dc77-4cec-92a4-55c6624a5b00', 'name': 'test-dataset-2', 'title': 'Dataset di test DCAT_AP-IT 2', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'modified':'2016-11-29', 'identifier':'ISBNcde', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'theme':'{ECON,ENVI}', 'dataset_is_local': True, 'language':'{DEU,ENG,ITA}', 'owner_org': org['name'], } packages = [pkg1, pkg2] ctx = {'ignore_auth': True, 'user': self._get_user()['name']} org_dict = helpers.call_action('organization_create', context=ctx, **org) for pkg in packages: helpers.call_action('package_create', context=ctx, **pkg) for pkg in packages: s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(pkg) has_identifier = False rights_holders = list(g.objects(dataset_ref, DCT.rightsHolder)) assert len(rights_holders), "There should be one rights holder for\n {}:\n {}".format(pkg, s.serialize_dataset(pkg)) for holder_ref in rights_holders: _holder_names = list(g.objects(holder_ref, FOAF.name)) _holder_ids = list((str(ob) for ob in g.objects(holder_ref, DCT.identifier))) # local dataset will use organization name only # while remote will have at least two names - one with lang, one default without lang if pkg['dataset_is_local']: num_holder_names = 1 else: num_holder_names = 2 assert len(_holder_names) == num_holder_names, _holder_names assert len(_holder_ids) == 1 test_id = pkg.get('holder_identifier') or org_dict['identifier'] has_identifier = _holder_ids[0] == test_id assert has_identifier, "No identifier in {} (expected {}) for\n {}\n{}".format(_holder_ids, test_id, pkg, s.serialize_dataset(pkg))
def test_holder(self): org = {'name': 'org-test', 'title': 'Test org', 'identifier': 'abc'} pkg1 = { # 'id': '2b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset-1', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'modified': '2016-11-29', 'identifier': str(uuid.uuid4()), 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '789789789', 'holder_name': 'bolzano', 'holder_identifier': '234234234', FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ECON', )), 'theme': json.dumps([theme_name_to_uri(name) for name in ('ECON', )]), 'dataset_is_local': False, 'language': '{DEU,ENG,ITA}', } pkg2 = { # 'id': 'eb6fe9ca-dc77-4cec-92a4-55c6624a5b00', 'name': 'test-dataset-2', 'title': 'Dataset di test DCAT_AP-IT 2', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'modified': '2016-11-29', 'identifier': str(uuid.uuid4()), 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '123123123123', FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ENVI', )), 'theme': json.dumps([theme_name_to_uri(name) for name in ('ENVI', )]), 'dataset_is_local': True, 'language': '{DEU,ENG,ITA}', 'owner_org': org['name'], } src_packages = [pkg1, pkg2] ctx = {'ignore_auth': True, 'user': self._get_user()['name']} org_loaded = Group.by_name(org['name']) if org_loaded: org_dict = org_loaded.__dict__ else: org_dict = helpers.call_action('organization_create', context=ctx, **org) pkg1['owner_org'] = org_dict['id'] pkg2['owner_org'] = org_dict['id'] created_packages = [ helpers.call_action('package_create', context=ctx, **pkg) for pkg in src_packages ] for pkg in created_packages: s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(pkg) has_identifier = False rights_holders = list(g.objects(dataset_ref, DCT.rightsHolder)) assert len(rights_holders), 'There should be one rights holder for\n {}:\n {}'.\ format(pkg, s.serialize_dataset(pkg)) for holder_ref in rights_holders: _holder_names = list(g.objects(holder_ref, FOAF.name)) _holder_ids = list( (str(ob) for ob in g.objects(holder_ref, DCT.identifier))) # local dataset will use organization name only # while remote will have at least two names - one with lang, one default without lang if pkg['dataset_is_local']: num_holder_names = 1 else: num_holder_names = 2 assert len(_holder_names) == num_holder_names, _holder_names assert len(_holder_ids) == 1 test_id = pkg.get( 'holder_identifier') or org_dict['identifier'] has_identifier = _holder_ids[0] == test_id assert has_identifier, \ f'No identifier in {_holder_ids} (expected {test_id}) for\n {pkg}\n{s.serialize_dataset(pkg)}'
def test_subthemes(self): load_themes() subthemes = [{ 'theme': 'AGRI', 'subthemes': [ 'http://eurovoc.europa.eu/100253', 'http://eurovoc.europa.eu/100258' ] }, { 'theme': 'ENVI', 'subthemes': [] }] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': 'bolzano', 'holder_identifier': '234234234', 'alternate_identifier': 'ISBN,TEST', FIELD_THEMES_AGGREGATE: json.dumps(subthemes), 'theme': theme_aggr_to_theme_uris( subthemes ) # this is added dinamically when retrieving datasets from the db } s = RDFSerializer() p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 parsed_dataset = datasets[0] # test themes parsed_themes_raw = _get_extra_value(parsed_dataset.get('extras'), 'theme') self.assertIsNotNone( parsed_themes_raw, f'Themes not found in parsed dataset {parsed_dataset}') parsed_themes = json.loads(parsed_themes_raw) self.assertEqual(2, len(parsed_themes)) self.assertSetEqual(set(theme_names_to_uris(['AGRI', 'ENVI'])), set(parsed_themes)) # test aggregated themes parsed_aggr_raw = parsed_dataset.get(FIELD_THEMES_AGGREGATE, None) self.assertIsNotNone( parsed_aggr_raw, f'Aggregated themes not found in parsed dataset {parsed_dataset}') parsed_aggr = json.loads(parsed_aggr_raw) self.assertIsNotNone(parsed_aggr, 'Aggregate is None') self.assertEquals(2, len(parsed_aggr)) for t in parsed_aggr: if t['theme'] == 'ENVI': self.assertSetEqual(set([]), set(t['subthemes'])) elif t['theme'] == 'AGRI': self.assertSetEqual(set(subthemes[0]['subthemes']), set(t['subthemes'])) else: self.fail(f'Unknown theme: {t}')