def dataontosearch_dataset_delete(context, data_dict): ''' Remove all existing association between the specified dataset and concepts. This will also remove the dataset from DataOntoSearch's data store. :param id: Name or ID of the dataset to remove from DataOntoSearch :type id: string :return: True if the dataset was removed, or False if the dataset was not found. :rtype: bool ''' toolkit.check_access(u'dataontosearch_dataset_delete', context, data_dict) # Extract parameters from data_dict dataset_id_or_name = toolkit.get_or_bust(data_dict, u'id') # What dataset is specified? dataset = toolkit.get_action(u'package_show')(None, { u'id': dataset_id_or_name, }) dataset_rdf_uri = dataset_uri(dataset) # Make the request r = make_tagger_delete_request(u'/dataset', { u'dataset_id': dataset_rdf_uri, }) r.raise_for_status() data = r.json() return data[u'success']
def graph_from_dataset(self, dataset_dict): ''' Given a CKAN dataset dict, creates a graph using the loaded profiles The class RDFLib graph (accessible via `serializer.g`) will be updated by the loaded profiles. Returns the reference to the dataset, which will be an rdflib URIRef. ''' uri_value = dataset_dict.get('uri') if not uri_value: for extra in dataset_dict.get('extras', []): if extra['key'] == 'uri': uri_value = extra['value'] break dataset_ref = URIRef(dataset_uri(dataset_dict)) for profile_class in self._profiles: profile = profile_class(self.g, self.compatibility_mode) if hasattr(self, 'validation_mode'): profile.validation_mode = self.validation_mode profile.graph_from_dataset(dataset_dict, dataset_ref) return dataset_ref
def dataontosearch_tag_delete(context, data_dict): ''' Remove an existing association between the specified dataset and concept. :param dataset: Name or ID of the dataset to disassociate with a concept :type dataset: string :param concept: RDF URI or human-readable label for the concept to no longer associate with the dataset :type dataset: string :return: True :rtype: bool ''' toolkit.check_access(u'dataontosearch_tag_delete', context, data_dict) # Extract parameters from data_dict dataset_id_or_name = toolkit.get_or_bust(data_dict, u'dataset') concept_url_or_label = toolkit.get_or_bust(data_dict, u'concept') # What dataset is specified? dataset = toolkit.get_action(u'package_show')(None, { u'id': dataset_id_or_name, }) dataset_rdf_uri = dataset_uri(dataset) # Make the request r = make_tagger_delete_request(u'/tag', { u'dataset_id': dataset_rdf_uri, u'concept': concept_url_or_label, }) r.raise_for_status() data = r.json() return data[u'success']
def _csc_dataset_uri(self, dataset_dict): ''' Returns an URI for the dataset This will be used to uniquely reference the dataset on the RDF serializations. The value will be the first found of: 1. `catalog_uri()` + '/catalogo/' + `name` field 2. The value of the `uri` field 3. The value of an extra with key `uri` 4. `catalog_uri()` + '/catalogo/' + `id` field Check the documentation for `catalog_uri()` for the recommended ways of setting it. Returns a string with the dataset URI. ''' if dataset_dict.get('name'): uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'), dataset_dict['name']) if not uri: uri = dataset_uri(dataset_dict) return uri
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': 'xyz'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language), ('theme', DCAT.theme), ('conforms_to', DCAT.conformsTo), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], value)
def dataontosearch_tag_list(context, data_dict): ''' List concepts associated with the specified dataset. :param id: id or name of the dataset to fetch tags for :type id: string :rtype: list of concepts. Each concept is a dict, with 'label' being human-readable label and 'uri' being the URI identifying this concept ''' toolkit.check_access(u'dataontosearch_tag_list', context, data_dict) # What dataset is specified? dataset_id_or_name = toolkit.get_or_bust(data_dict, u'id') dataset = toolkit.get_action(u'package_show')(None, { u'id': dataset_id_or_name }) # Generate the RDF URI for this dataset, using the very same code used by # ckanext-dcat. We need this to be consistent with what DataOntoSearch found # when it retrieved the dataset RDF, thus this use of the internal DCAT API. dataset_rdf_uri = dataset_uri(dataset) r = make_tagger_get_request(u'/tag', {u'dataset_id': dataset_rdf_uri}) r.raise_for_status() data = r.json() if data is None: return [] else: return data[u'concepts']
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'identifier': 'ISBN', 'temporal_start': '2016-11-01', 'temporal_end': '2016-11-30', 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': 'bolzano', 'holder_identifier': '234234234', 'alternate_identifier': 'ISBN,TEST', 'theme': '{ECON,ENVI}', 'geographical_geonames_url': 'http://www.geonames.org/3181913', 'language': '{DEU,ENG,ITA}', 'is_version_of': 'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to': '{CONF1,CONF2,CONF3}' } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
def _extras(self): ''' Yields some additional triples that don't really fit under any of the other existing method groupings. :return: yields triples ''' yield (self.record_ref, self.namespaces.aiiso.Department, Literal(get_department(self.record[u'collectionCode']))) yield self.record_ref, self.namespaces.aiiso.Division, self._get_value(u'subDepartment') yield (self.record_ref, self.namespaces.void.inDataset, URIRef(dataset_uri({u'id': self.resource.get_package_id()}) + u'#dataset'))
def graph_from_dataset(self, dataset_dict): ''' Given a CKAN dataset dict, creates a graph using the loaded profiles The class RDFLib graph (accessible via `serializer.g`) will be updated by the loaded profiles. Returns the reference to the dataset, which will be an rdflib URIRef. ''' uri_value = dataset_dict.get('uri') if not uri_value: for extra in dataset_dict.get('extras', []): if extra['key'] == 'uri': uri_value = extra['value'] break dataset_ref = URIRef(dataset_uri(dataset_dict)) for profile_class in self._profiles: profile = profile_class(self.g, self.compatibility_mode) profile.graph_from_dataset(dataset_dict, dataset_ref) return dataset_ref
def graph_from_dataset(self, dataset_dict, dataset_ref): title = dataset_dict.get('title') g = self.g for prefix, namespace in it_namespaces.iteritems(): g.bind(prefix, namespace) ### add a further type for the Dataset node g.add((dataset_ref, RDF.type, DCATAPIT.Dataset)) ### replace themes value = self._get_dict_value(dataset_dict, 'theme') self._add_themes(dataset_ref, value) ### replace languages value = self._get_dict_value(dataset_dict, 'language') if value: for lang in value.split(','): self.g.remove((dataset_ref, DCT.language, Literal(lang))) lang = lang.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang))) # self._add_concept(LANG_CONCEPTS, lang) ### add spatial (EU URI) value = self._get_dict_value(dataset_dict, 'geographical_name') if value: for gname in value.split(','): gname = gname.replace('{', '').replace('}', '') dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) # Try and add a Concept from the spatial vocabulary if self._add_concept(GEO_CONCEPTS, gname): self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(GEO_BASE_URI + gname))) # geo concept is not really required, but may be a useful adding self.g.add((dct_location, LOCN.geographicalName, URIRef(GEO_BASE_URI + gname))) else: # The dataset field is not a controlled tag, let's create a Concept out of the label we have concept = BNode() self.g.add((concept, RDF['type'], SKOS.Concept)) self.g.add((concept, SKOS.prefLabel, Literal(gname))) self.g.add((dct_location, LOCN.geographicalName, concept)) ### add spatial (GeoNames) value = self._get_dict_value(dataset_dict, 'geographical_geonames_url') if value: dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(value))) ### replace periodicity self._remove_node(dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, None, Literal)) self._add_uri_node( dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef), FREQ_BASE_URI) # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY)) ### replace landing page self._remove_node(dataset_dict, dataset_ref, ('url', DCAT.landingPage, None, URIRef)) landing_page_uri = None if dataset_dict.get('name'): landing_page_uri = '{0}/dataset/{1}'.format( catalog_uri().rstrip('/'), dataset_dict['name']) else: landing_page_uri = dataset_uri( dataset_dict) # TODO: preserve original URI if harvested self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri))) ### conformsTo self.g.remove((dataset_ref, DCT.conformsTo, None)) value = self._get_dict_value(dataset_dict, 'conforms_to') if value: try: conforms_to = json.loads(value) except ( TypeError, ValueError, ): log.warn("Cannot deserialize DCATAPIT:conformsTo value: %s", value) conforms_to = [] for item in conforms_to: standard = URIRef(item['uri']) if item.get('uri') else BNode() self.g.add((dataset_ref, DCT.conformsTo, standard)) self.g.add((standard, RDF['type'], DCT.Standard)) self.g.add((standard, RDF['type'], DCATAPIT.Standard)) self.g.add( (standard, DCT.identifier, Literal(item['identifier']))) for lang, val in (item.get('title') or {}).items(): if lang in OFFERED_LANGS: self.g.add( (standard, DCT.title, Literal(val, lang=lang_mapping_ckan_to_xmllang.get( lang, lang)))) for lang, val in (item.get('description') or {}).items(): if lang in OFFERED_LANGS: self.g.add( (standard, DCT.description, Literal(val, lang=lang_mapping_ckan_to_xmllang.get( lang, lang)))) for reference_document in (item.get('referenceDocumentation') or []): self.g.add((standard, DCATAPIT.referenceDocumentation, URIRef(reference_document))) ### ADMS:identifier alternative identifiers self.g.remove(( dataset_ref, ADMS.identifier, None, )) try: alt_ids = json.loads(dataset_dict['alternate_identifier']) except ( KeyError, TypeError, ValueError, ): alt_ids = [] for alt_identifier in alt_ids: node = BNode() self.g.add((dataset_ref, ADMS.identifier, node)) identifier = Literal(alt_identifier['identifier']) self.g.add((node, SKOS.notation, identifier)) if alt_identifier.get('agent'): adata = alt_identifier['agent'] agent = BNode() self.g.add((agent, RDF['type'], DCATAPIT.Agent)) self.g.add((agent, RDF['type'], FOAF.Agent)) self.g.add((node, DCT.creator, agent)) if adata.get('agent_name'): for alang, aname in adata['agent_name'].items(): self.g.add((agent, FOAF.name, Literal(aname, lang=alang))) if adata.get('agent_identifier'): self.g.add((agent, DCT.identifier, Literal(adata['agent_identifier']))) self._set_temporal_coverage(self.g, dataset_dict, dataset_ref) ### publisher # DCAT by default creates this node # <dct:publisher> # <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a"> # <foaf:name>orga2_test</foaf:name> # </foaf:Organization> # </dct:publisher> for s, p, o in g.triples((dataset_ref, DCT.publisher, None)): #log.info("Removing publisher %r", o) g.remove((s, p, o)) publisher_ref = self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher, use_default_lang=True) ### Autore : Agent self._add_creators(dataset_dict, dataset_ref) ### Point of Contact # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/> # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri --> # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"> # <rdf:type rdf:resource="&vcard;Kind"/> # <rdf:type rdf:resource="&vcard;Organization"/> # <vcard:hasEmail rdf:resource="mailto:[email protected]"/> # <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn> # </dcatapit:Organization> # TODO: preserve original info if harvested # retrieve the contactPoint added by the euro serializer euro_poc = g.value(subject=dataset_ref, predicate=DCAT.contactPoint, object=None, any=False) # euro poc has this format: # <dcat:contactPoint> # <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e"> # <vcard:fn>THE ORGANIZATION NAME</vcard:fn> # <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail> # </vcard:Organization> # </dcat:contactPoint> if euro_poc: g.remove((dataset_ref, DCAT.contactPoint, euro_poc)) org_id = dataset_dict.get('owner_org') # get orga info org_show = logic.get_action('organization_show') org_dict = {} if org_id: try: org_dict = org_show({'ignore_auth': True}, { 'id': org_id, 'include_datasets': False, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False }) except Exception, err: log.warning("Cannot get org for %s: %s", org_id, err, exc_info=err)
def test_graph_from_dataset(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "notes": "Lorem ipsum", "url": "http://example.com/ds1", "version": "1.0b", "metadata_created": "2015-06-26T15:21:09.034694", "metadata_modified": "2015-06-26T15:21:09.075774", "tags": [{"name": "Tag 1"}, {"name": "Tag 2"}], "extras": [ {"key": "alternate_identifier", "value": '["xyz", "abc"]'}, {"key": "version_notes", "value": "This is a beta version"}, {"key": "frequency", "value": "monthly"}, {"key": "language", "value": '["en"]'}, {"key": "theme", "value": '["http://eurovoc.europa.eu/100142", "http://eurovoc.europa.eu/100152"]'}, {"key": "conforms_to", "value": '["Standard 1", "Standard 2"]'}, {"key": "access_rights", "value": "public"}, {"key": "documentation", "value": '["http://dataset.info.org/doc1", "http://dataset.info.org/doc2"]'}, {"key": "provenance", "value": "Some statement about provenance"}, {"key": "dcat_type", "value": "test-type"}, { "key": "related_resource", "value": '["http://dataset.info.org/related1", "http://dataset.info.org/related2"]', }, { "key": "has_version", "value": '["https://data.some.org/catalog/datasets/derived-dataset-1", "https://data.some.org/catalog/datasets/derived-dataset-2"]', }, {"key": "is_version_of", "value": '["https://data.some.org/catalog/datasets/original-dataset"]'}, { "key": "source", "value": '["https://data.some.org/catalog/datasets/source-dataset-1", "https://data.some.org/catalog/datasets/source-dataset-2"]', }, { "key": "sample", "value": '["https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample"]', }, ], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset["title"]) assert self._triple(g, dataset_ref, DCT.description, dataset["notes"]) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset["version"]) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras["version_notes"]) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras["frequency"]) assert self._triple(g, dataset_ref, DCT.accessRights, extras["access_rights"]) assert self._triple(g, dataset_ref, DCT.provenance, extras["provenance"]) assert self._triple(g, dataset_ref, DCT.type, extras["dcat_type"]) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset["tags"]: assert self._triple(g, dataset_ref, DCAT.keyword, tag["name"]) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset["metadata_created"], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset["metadata_modified"], XSD.dateTime) # List for item in [ ("language", DCT.language, Literal), ("theme", DCAT.theme, URIRef), ("conforms_to", DCT.conformsTo, Literal), ("alternate_identifier", ADMS.identifier, Literal), ("documentation", FOAF.page, Literal), ("related_resource", DCT.relation, Literal), ("has_version", DCT.hasVersion, Literal), ("is_version_of", DCT.isVersionOf, Literal), ("source", DCT.source, Literal), ("sample", ADMS.sample, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'keywords': { 'fr': [], 'de': ['alter', 'sozialhilfe'], 'en': ['age'], 'it': [] }, 'groups': [{ 'display_name': { 'fr': 'Economie nationale', 'de': 'Volkswirtschaft', 'en': 'National economy', 'it': 'Economia' }, 'description': { 'fr': '', 'de': '', 'en': 'some descriptiom' '', 'it': '' }, 'image_display_url': '', 'title': { 'fr': 'Economie nationale', 'de': 'Volkswirtschaft', 'en': 'National economy', 'it': 'Economia' }, 'id': '5389c3f2-2f64-436b-9fac-2d1fc342f7b5', 'name': 'national-economy' }, { 'display_name': { 'fr': 'Education, science', 'de': 'Bildung, Wissenschaft', 'en': 'Education and science', 'it': 'Formazione e scienza' }, 'description': { 'fr': '', 'de': '', 'en': '', 'it': '' }, 'image_display_url': '', 'title': { 'fr': 'Education, science', 'de': 'Bildung, Wissenschaft', 'en': 'Education and science', 'it': 'Formazione e scienza' }, 'id': 'afcb4a2a-b4b0-4d7c-984a-9078e964be49', 'name': 'education' }, { 'display_name': { 'fr': 'Finances', 'de': 'Finanzen', 'en': 'Finances', 'it': 'Finanze' }, 'description': { 'fr': '', 'de': '', 'en': '', 'it': '' }, 'image_display_url': '', 'title': { 'fr': 'Finances', 'de': 'Finanzen', 'en': 'Finances', 'it': 'Finanze' }, 'id': '79cbe120-e9c6-4249-b934-58ca980606d7', 'name': 'finances' }], 'description': { 'fr': '', 'de': 'Deutsche Beschreibung', 'en': 'English Description', 'it': '' }, 'extras': [ { 'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]' }, { 'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066' }, { 'key': 'version_notes', 'value': 'This is a beta version' }, { 'key': 'frequency', 'value': 'monthly' }, { 'key': 'language', 'value': '[\"en\"]' }, { 'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]' }, { 'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]' }, { 'key': 'access_rights', 'value': 'public' }, { 'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]' }, { 'key': 'provenance', 'value': 'Some statement about provenance' }, { 'key': 'dcat_type', 'value': 'test-type' }, { 'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]' }, { 'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]' }, { 'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]' }, { 'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]' }, { 'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['swiss_schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) # Dates assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['metadata_created']) assert self._triple(g, dataset_ref, SCHEMA.dateModified, dataset['metadata_modified']) for key, value in dataset['description'].iteritems(): if dataset['description'].get(key): assert self._triple(g, dataset_ref, SCHEMA.description, Literal(value, lang=key)) eq_( len([ t for t in g.triples((dataset_ref, SCHEMA.description, None)) ]), 2) # Tags eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 3) for key, keywords in dataset['keywords'].iteritems(): if dataset['keywords'].get(key): for keyword in keywords: assert self._triple(g, dataset_ref, SCHEMA.keywords, Literal(keyword, lang=key)) # List for item in [ ('language', SCHEMA.inLanguage, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'extras': [ { 'key': 'alternate_identifier', 'value': 'xyz' }, { 'key': 'version_notes', 'value': 'This is a beta version' }, { 'key': 'frequency', 'value': 'monthly' }, { 'key': 'language', 'value': '[\"en\"]' }, { 'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]' }, { 'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language), ('theme', DCAT.theme), ('conforms_to', DCAT.conformsTo), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], value)
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'extras': [ { 'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]' }, { 'key': 'version_notes', 'value': 'This is a beta version' }, { 'key': 'frequency', 'value': 'monthly' }, { 'key': 'language', 'value': '[\"en\", \"http://publications.europa.eu/resource/authority/language/ITA\"]' }, { 'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]' }, { 'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]' }, { 'key': 'access_rights', 'value': 'public' }, { 'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]' }, { 'key': 'provenance', 'value': 'Some statement about provenance' }, { 'key': 'dcat_type', 'value': 'test-type' }, { 'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]' }, { 'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]' }, { 'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]' }, { 'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]' }, { 'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) assert self._triple(g, dataset_ref, DCT.accessRights, extras['access_rights']) assert self._triple(g, dataset_ref, DCT.provenance, extras['provenance']) assert self._triple(g, dataset_ref, DCT.type, extras['dcat_type']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language, [Literal, URIRef]), ('theme', DCAT.theme, URIRef), ('conforms_to', DCT.conformsTo, Literal), ('alternate_identifier', ADMS.identifier, Literal), ('documentation', FOAF.page, URIRef), ('related_resource', DCT.relation, URIRef), ('has_version', DCT.hasVersion, URIRef), ('is_version_of', DCT.isVersionOf, URIRef), ('source', DCT.source, Literal), ('sample', ADMS.sample, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for num, value in enumerate(values): _type = item[2] if isinstance(item[2], list): eq_(len(item[2]), len(values)) _type = item[2][num] assert self._triple(g, dataset_ref, item[1], _type(value))
def test_graph_from_dataset(self): conforms_to_in = [{'identifier': 'CONF1', 'uri': 'conf01', 'title': {'en': 'title', 'it': 'title'}, 'referenceDocumentation': ['http://abc.efg/'],}, {'identifier': 'CONF2', 'title': {'en': 'title', 'it': 'title'}, 'description': {'en': 'descen', 'it': 'descit'}, 'referenceDocumentation': ['http://abc.efg/'],}, ] alternate_identifiers = [{'identifier': 'aaaabc', 'agent': {'agent_identifier': 'agent01', 'agent_name': {'en': 'Agent en 01', 'it': 'Agent it 01'}}, }, {'identifier': 'other identifier', 'agent': {}}] creators = [{'creator_name': {'en': 'abc'}, 'creator_identifier': "ABC"}, {'creator_name': {'en': 'cde'}, 'creator_identifier': "CDE"}, ] temporal_coverage = [{'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 10:11:12'}, {'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 11:12:13'}, ] subthemes = [{'theme': 'AGRI', 'subthemes': ['http://eurovoc.europa.eu/100253', 'http://eurovoc.europa.eu/100258']}, {'theme': 'ENVI', 'subthemes': []}] dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'issued':'2016-11-29', 'modified':'2016-11-29', 'identifier':'ISBN', 'temporal_start':'2016-11-01', 'temporal_end':'2016-11-30', 'frequency':'UPDATE_CONT', 'publisher_name':'bolzano', 'publisher_identifier':'234234234', 'creator_name':'test', 'creator_identifier':'412946129', 'holder_name':'bolzano', 'holder_identifier':'234234234', 'alternate_identifier':json.dumps(alternate_identifiers), 'temporal_coverage': json.dumps(temporal_coverage), #'theme':'ECON', 'geographical_geonames_url':'http://www.geonames.org/3181913', 'language':'{DEU,ENG,ITA}', 'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to':json.dumps(conforms_to_in), 'creator': json.dumps(creators), 'theme': json.dumps(subthemes), } pkg_id = dataset['id'] pub_names = {'it': 'IT publisher', 'es': 'EN publisher'} holder_names = {'it': 'IT holder name', 'es': 'EN holder name'} multilang_fields = [('publisher_name', 'package', k, v) for k, v in pub_names.items()] +\ [('holder_name', 'package', k, v) for k, v in holder_names.items()] pkg = helpers.call_action('package_create', {'defer_commit': True}, **dataset) rev = getattr(Session, 'revision', repo.new_revision()) Session.flush() Session.revision = rev pkg_id = pkg['id'] for field_name, field_type, lang, text in multilang_fields: interfaces.upsert_package_multilang(pkg_id, field_name, field_type, lang, text) loc_dict = interfaces.get_for_package(pkg_id) #assert loc_dict['publisher_name'] == pub_names #assert loc_dict['holder_name'] == holder_names # temporary bug for comaptibility with interfaces.get_language(), # which will return lang[0] pub_names.update({DEFAULT_LANG: dataset['publisher_name']}) # pub_names.update({DEFAULT_LANG[0]: dataset['publisher_name']}) holder_names.update({DEFAULT_LANG: dataset['holder_name']}) # holder_names.update({DEFAULT_LANG[0]: dataset['holder_name']}) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # conformsTo conforms_to = list(g.triples((None, DCT.conformsTo, None))) assert conforms_to conforms_to_dict = dict((d['identifier'], d) for d in conforms_to_in) for conf in conforms_to: conf_id = conf[-1] identifier = g.value(conf_id, DCT.identifier) titles = list(g.objects(conf_id, DCT.title)) descs = list(g.objects(conf_id, DCT.description)) references = list(g.objects(conf_id, DCATAPIT.referenceDocumentation)) check = conforms_to_dict.get(str(identifier)) assert isinstance(check, dict) if check.get('uri'): assert check['uri'] == str(conf_id) assert len(titles), "missing titles" assert (len(descs)> 0) == bool(check.get('description')), "missing descriptions" for title in titles: tlang = title.language tval = str(title) assert tval == check['title'][tlang], (tlang, tval, check['title']) for desc in descs: tlang = desc.language tval = str(desc) assert tval == check['description'][tlang], (tlang, str(tval), check['description']) ref_docs = check.get('referenceDocumentation') assert len(references) == len(ref_docs), "missing reference documentation" for dref in references: assert str(dref) in ref_docs, "{} not in {}".format(dref, ref_docs) for ref in ref_docs: assert URIRef(ref) in references # alternate identifiers alt_ids = [a[-1] for a in g.triples((None, ADMS.identifier, None))] alt_ids_dict = dict((a['identifier'], a) for a in alternate_identifiers) for alt_id in alt_ids: identifier = g.value(alt_id, SKOS.notation) check = alt_ids_dict[str(identifier)] assert str(identifier) == check['identifier'] if check.get('agent'): agent_ref = g.value(alt_id, DCT.creator) assert agent_ref is not None agent_identifier = g.value(agent_ref, DCT.identifier) agent_name = dict((v.language, str(v)) for v in g.objects(agent_ref, FOAF.name)) assert set(agent_name.items()) == set(check['agent']['agent_name'].items()),\ "expected {}, got {} for {}".format(check['agent']['agent_name'], agent_name, agent_ref) assert str(agent_identifier) == check['agent']['agent_identifier'],\ "expected {}, got {}".format(check['agent']['agent_identifier'], agent_identifier) # creators creators.append({'creator_name':{'en': 'test'}, 'creator_identifier':'412946129'}) creators_in = list(g.objects(dataset_ref, DCT.creator)) assert len(creators) == len(creators_in) for cref in creators_in: cnames = dict((str(c.language) if c.language else DEFAULT_LANG, str(c)) for c in g.objects(cref, FOAF.name)) c_identifier = g.value(cref, DCT.identifier) c_dict = {'creator_name': cnames, 'creator_identifier': str(c_identifier)} assert c_dict in creators, "no {} in {}".format(c_dict, creators) # temporal coverage temporal_coverage.append({'temporal_start': dataset['temporal_start'], 'temporal_end': dataset['temporal_end']}) temp_exts = list(g.triples((dataset_ref, DCT.temporal, None))) assert len(temp_exts) == len(temporal_coverage) # normalize values for item in temporal_coverage: for k, v in item.items(): item[k] = pdate(v) temp_ext = [] for interval_t in temp_exts: interval = interval_t[-1] start = g.value(interval, SCHEMA.startDate) end = g.value(interval, SCHEMA.endDate) assert start is not None assert end is not None temp_ext.append({'temporal_start': pdate(str(start)), 'temporal_end': pdate(str(end))}) set1 = set([tuple(d.items()) for d in temp_ext]) set2 = set([tuple(d.items()) for d in temporal_coverage]) assert set1 == set2, "Got different temporal coverage sets: \n{}\n vs\n {}".format(set1, set2) for pub_ref in g.objects(dataset_ref, DCT.publisher): _pub_names = list(g.objects(pub_ref, FOAF.name)) assert len(_pub_names) for pub_name in _pub_names: if pub_name.language: assert str(pub_name.language) in pub_names, "no {} in {}".format(pub_name.language, pub_names) assert pub_names[str(pub_name.language)] == str(pub_name), "{} vs {}".format(pub_name, pub_names) for holder_ref in g.objects(dataset_ref, DCT.rightsHolder): _holder_names = list(g.objects(holder_ref, FOAF.name)) assert len(_holder_names) for holder_name in _holder_names: if holder_name.language: assert str(holder_name.language) in holder_names, "no {} in {}".format(holder_name.language, holder_names) assert holder_names[str(holder_name.language)] == str(holder_name), "{} vs {}".format(holder_name, holder_names)
def test_graph_from_dataset(self): src_conforms_to = [ { 'identifier': 'CONF1', 'uri': 'conf01', 'title': { 'en': 'title1EN', 'it': 'title1IT' }, 'referenceDocumentation': ['http://abc.efg/'], }, { 'identifier': 'CONF2', 'title': { 'en': 'title2EN', 'it': 'title2IT' }, 'description': { 'en': 'desc2EN', 'it': 'desc2IT' }, 'referenceDocumentation': ['http://abc.efg/'], }, ] src_alt_identifiers = [{ 'identifier': 'aaaabc', 'agent': { 'agent_identifier': 'agent01', 'agent_name': { 'en': 'Agent en 01', 'it': 'Agent it 01' } }, }, { 'identifier': 'other identifier', 'agent': {} }] src_creators = [ { 'creator_name': { 'en': 'abcEN', 'it': 'abcIT' }, 'creator_identifier': 'ABC' }, { 'creator_name': { 'en': 'cde' }, 'creator_identifier': 'CDE' }, ] src_temporal_coverage = [ { 'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 10:11:12' }, { 'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 11:12:13' }, ] subthemes = [{ 'theme': 'AGRI', 'subthemes': [ 'http://eurovoc.europa.eu/100253', 'http://eurovoc.europa.eu/100258' ] }, { 'theme': 'ENVI', 'subthemes': [] }] pub_it = 'IT publisher' holder_it = 'IT holder' org = factories.Organization(identifier=uuid.uuid4(), is_org=True, name=uuid.uuid4()) src_dataset = { # 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'owner_org': org['id'], 'name': str(uuid.uuid4()), 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'identifier': str(uuid.uuid4()), 'temporal_start': '2016-11-01', 'temporal_end': '2016-11-30', 'frequency': 'UPDATE_CONT', 'publisher_name': pub_it, 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': holder_it, 'holder_identifier': '234234234', 'alternate_identifier': json.dumps(src_alt_identifiers), 'temporal_coverage': json.dumps(src_temporal_coverage), # 'theme':'ECON', 'geographical_geonames_url': 'http://www.geonames.org/3181913', 'language': '{DEU,ENG,ITA}', 'is_version_of': 'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to': json.dumps(src_conforms_to), 'creator': json.dumps(src_creators), FIELD_THEMES_AGGREGATE: json.dumps(subthemes), 'theme': theme_aggr_to_theme_uris(subthemes), } src_pub_names = {'it': pub_it, 'en': 'EN publisher'} src_holder_names = {'it': holder_it, 'en': 'EN holder name'} multilang_fields = [('publisher_name', 'package', k, v) for k, v in src_pub_names.items()] +\ [('holder_name', 'package', k, v) for k, v in src_holder_names.items()] pkg = helpers.call_action('package_create', {'defer_commit': True}, **src_dataset) Session.flush() pkg_id = pkg['id'] src_dataset['id'] = pkg_id for field_name, field_type, lang, text in multilang_fields: interfaces.upsert_package_multilang(pkg_id, field_name, field_type, lang, text) # loc_dict = interfaces.get_for_package(pkg_id) #assert loc_dict['publisher_name'] == pub_names #assert loc_dict['holder_name'] == holder_names # LEGACY: temporary bug for comaptibility with interfaces.get_language(), # which will return lang[0] # pub_names.update({DEFAULT_LANG: src_dataset['publisher_name']}) # pub_names.update({DEFAULT_LANG[0]: dataset['publisher_name']}) # holder_names.update({DEFAULT_LANG: src_dataset['holder_name']}) # holder_names.update({DEFAULT_LANG[0]: dataset['holder_name']}) s = RDFSerializer() g = s.g dataset_graph = s.graph_from_dataset(pkg) self.assertEqual(str(dataset_graph), str(utils.dataset_uri(src_dataset)), 'Dataset URI changes') # Basic fields self.assertIsNotNone( self._triple(g, dataset_graph, RDF.type, DCATAPIT.Dataset)) self.assertIsNotNone( self._triple(g, dataset_graph, DCT.title, src_dataset['title'])) self.assertIsNotNone( self._triple(g, dataset_graph, DCT.description, src_dataset['notes'])) self.assertIsNotNone( self._triple(g, dataset_graph, DCT.identifier, src_dataset['identifier'])) # Tags self.assertEqual( 2, len([t for t in g.triples((dataset_graph, DCAT.keyword, None))])) for tag in src_dataset['tags']: self.assertIsNotNone( self._triple(g, dataset_graph, DCAT.keyword, tag['name'])) # conformsTo conforms_to_nodes = list(g.objects(dataset_graph, DCT.conformsTo)) self.assertEqual(2, len(conforms_to_nodes)) src_conforms_dict = {d['identifier']: d for d in src_conforms_to} for conf_node in conforms_to_nodes: conf_id = str(conf_node) identifier = g.value(conf_node, DCT.identifier) titles = list(g.objects(conf_node, DCT.title)) descs = list(g.objects(conf_node, DCT.description)) references = list( g.objects(conf_node, DCATAPIT.referenceDocumentation)) src_conforms = src_conforms_dict.get(str(identifier)) assert isinstance(src_conforms, dict) if src_conforms.get('uri'): assert src_conforms['uri'] == str(conf_node) assert len(titles), 'missing titles' assert (len(descs) > 0) == bool( src_conforms.get('description')), 'missing descriptions' titles_dict = {title.language: str(title) for title in titles} for lang, src_value in src_conforms['title'].items( ): # looping on the source items bc graph info may have been augmented self.assertEqual(src_value, titles_dict[lang], f'Titles do not match for lang:{lang}') descr_dict = {descr.language: str(descr) for descr in descs} for lang, src_value in src_conforms.get('description', {}).items( ): # looping on the source items bc graph info may have been augmented self.assertEqual(src_value, descr_dict[lang], f'descriptions do not match for lang:{lang}') ref_docs = src_conforms.get('referenceDocumentation') assert len(references) == len( ref_docs), 'missing reference documentation' for dref in references: assert str(dref) in ref_docs, '{} not in {}'.format( dref, ref_docs) for ref in ref_docs: assert URIRef(ref) in references # alternate identifiers alt_ids = [a[-1] for a in g.triples((None, ADMS.identifier, None))] alt_ids_dict = dict((a['identifier'], a) for a in src_alt_identifiers) for alt_id in alt_ids: identifier = g.value(alt_id, SKOS.notation) src_conforms = alt_ids_dict[str(identifier)] assert str(identifier) == src_conforms['identifier'] if src_conforms.get('agent'): agent_ref = g.value(alt_id, DCT.creator) assert agent_ref is not None # agent_identifier = g.value(agent_ref, DCT.identifier) agent_name = { v.language: str(v) for v in g.objects(agent_ref, FOAF.name) } for a in set(src_conforms['agent']['agent_name'].items()): self.assertIn(a, set(agent_name.items()), "Agents name not found") self.assertEqual(src_conforms['agent']['agent_identifier'], str(g.value(agent_ref, DCT.identifier)), "Agents identifier mismatch") # creators creators_in = list(g.objects(dataset_graph, DCT.creator)) assert len(src_creators) == len(creators_in) for cref in creators_in: c_identifier = str(g.value(cref, DCT.identifier)) cnames = dict( (str(c.language) if c.language else DEFAULT_LANG, str(c)) for c in g.objects(cref, FOAF.name)) src_creator = [ x for x in src_creators if x['creator_identifier'] == c_identifier ] self.assertEqual(1, len(src_creator)) for lang, name in src_creator[0]['creator_name'].items(): self.assertEqual(name, cnames[lang]) # c_dict = {'creator_name': cnames, # 'creator_identifier': str(c_identifier)} # assert c_dict in src_creators, 'no {} in {}'.format(c_dict, src_creators) # temporal coverage temp_exts = list(g.triples((dataset_graph, DCT.temporal, None))) assert len(temp_exts) == len(src_temporal_coverage) # normalize values for item in src_temporal_coverage: for k, v in item.items(): item[k] = pdate(v) temp_ext = [] for interval_t in temp_exts: interval = interval_t[-1] start = g.value(interval, SCHEMA.startDate) end = g.value(interval, SCHEMA.endDate) assert start is not None assert end is not None temp_ext.append({ 'temporal_start': pdate(str(start)), 'temporal_end': pdate(str(end)) }) set1 = set([tuple(d.items()) for d in temp_ext]) set2 = set([tuple(d.items()) for d in src_temporal_coverage]) assert set1 == set2, 'Got different temporal coverage sets: \n{}\n vs\n {}'.format( set1, set2) for pub_ref in g.objects(dataset_graph, DCT.publisher): _pub_names = list(g.objects(pub_ref, FOAF.name)) assert len(_pub_names) for pub_name in _pub_names: if pub_name.language: self.assertIn( str(pub_name.language), src_pub_names.keys(), f'Missing publisher lang:{pub_name.language}') self.assertEqual( src_pub_names[str(pub_name.language)], str(pub_name), f'Mismatching publisger name lang:{pub_name.language} ' )
def graph_from_record(self, record_dict, resource, record_ref): """ RDF for an individual record - currently this is a specimen record Similar approach to: curl -L -H "Accept: application/rdf+ttl" http://data.rbge.org.uk/herb/E00321910 :param record_dict: :param resource: :param record_ref: :return: """ context = self.get_context() namespaces = { 'dc': DC, 'dcat': DCAT, 'dwc': DWC, 'sdwc': SDWC, 'void': VOID, 'cc': CC, 'foaf': FOAF, 'dqv': DQV, 'aiiso': AIISO, 'tdwgi': TDWGI, 'owl': OWL } g = self.g # Add some more namespaces for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) # Get the GBIF record if it exists occurrence_id = record_dict.get('occurrenceID') package_id = resource.get_package_id() # Create licences metadata for record object_uri = URIRef(record_ref + '#object') # Add publisher - as per BBC we don't need the full org description here nhm_uri = URIRef('http://nhm.ac.uk') # Add object description - the metadata and license g.add((record_ref, RDF.type, FOAF.Document)) g.add((record_ref, CC.license, URIRef(METADATA_LICENCE))) # This metadata describes #dataset g.add((record_ref, FOAF.primaryTopic, object_uri)) # Add the de-referenced link to record record_link = url_for('record', action='view', package_name=package_id, resource_id=resource.id, record_id=record_dict['_id'], qualified=True) g.add((record_ref, DC.hasVersion, URIRef(record_link))) # Add institution properties g.add((record_ref, FOAF.organization, nhm_uri)) g.add((record_ref, AIISO.Department, Literal(get_department(record_dict['collectionCode'])))) try: sub_dept = record_dict.pop('subDepartment') except KeyError: pass else: g.add((record_ref, AIISO.Division, Literal(sub_dept))) # Created and modified belong to the metadata record, not the specimen for term in ['created', 'modified']: try: value = record_dict.get(term) except KeyError: pass else: # Parse into data format, and add as dates _date = parse_date(value) g.add((record_ref, getattr(DWC, term), Literal(_date.isoformat(), datatype=XSD.dateTime))) try: gbif_record = toolkit.get_action('gbif_record_show')(context, { 'occurrence_id': occurrence_id }) except NotFound: gbif_record = {} else: # Assert equivalence with the GBIF record gbif_uri = os.path.join('http://www.gbif.org/occurrence', gbif_record['gbifID']) g.add((object_uri, OWL.sameAs, URIRef(gbif_uri))) # If we have a GBIF country code, add it # Annoyingly, this seems to be the only geographic element on GBIF with URI country_code = gbif_record.get('gbifCountryCode') if country_code: g.add((object_uri, DWC.countryCode, URIRef(os.path.join('http://www.gbif.org/country', country_code)))) # Now, create the specimen object # Remove nulls and hidden fields from record_dict record_dict = dict((k, v) for k, v in record_dict.iteritems() if v) # Now add the actual specimen object g.add((object_uri, RDF.type, FOAF.Document)) g.add((object_uri, RDF.type, SDWC.SimpleDarwinRecordSet)) # Make sure decimal latitude and longitude are strings for d in ['decimalLatitude', 'decimalLongitude']: try: record_dict[d] = str(record_dict[d]) except KeyError: pass # Adding images as JSON is rubbish! So lets try and do it properly try: associated_media = record_dict.pop('associatedMedia') except KeyError: pass else: images = json.loads(associated_media) for image in images: image_uri = URIRef(image['identifier']) g.set((image_uri, RDF.type, FOAF.Image)) title = image.get('title', None) if title: g.set((image_uri, DC.title, Literal(title))) g.set((image_uri, CC.license, URIRef(image['license']))) g.set((image_uri, DC.RightsStatement, Literal(image['rightsHolder']))) g.set((image_uri, DC.Format, Literal(image['format']))) # Add link from image to object... g.set((image_uri, FOAF.depicts, object_uri)) # And object to image g.add((object_uri, FOAF.depiction, image_uri)) # This record belongs in X dataset dataset_ref = URIRef(dataset_uri({'id': package_id}) + '#dataset') g.add((object_uri, VOID.inDataset, dataset_ref)) dwc_terms_dict = dwc_terms(record_dict.keys()) # Handle dynamic properties separately dynamic_properties = dwc_terms_dict.pop('dynamicProperties') for group, terms in dwc_terms_dict.items(): for uri, term in terms.items(): # Do we have a GBIF key value? # Uppercase first letter of term, and convert to GBIF key format => gbifGenusKey uc_term = term[0].upper() + term[1:] gbif_term_key = 'gbif%sKey' % uc_term gbif_key = gbif_record.get(gbif_term_key) # Do we have a GBIF key value? If we do, we can provide a URI to GBIF if gbif_key: gbif_uri = URIRef(os.path.join('http://www.gbif.org/species', gbif_key)) # Add the GBIF species URI with label g.add((gbif_uri, RDFS.label, Literal(record_dict.get(term)))) # And associated our specimen object's DWC term with the GBIF URI g.add((object_uri, getattr(DWC, term), gbif_uri)) else: # We do not have a GBIF key, so no URI: Add the term value as a literal g.add((object_uri, getattr(DWC, term), Literal(record_dict.get(term)))) g.add((object_uri, DC.identifier, Literal(record_dict.get('uuid')))) dynamic_properties_dict = {} for properties in dynamic_properties.values(): for property in properties: dynamic_properties_dict[property] = record_dict.get(property) if dynamic_properties_dict: g.add((object_uri, DWC.dynamicProperties, Literal(json.dumps(dynamic_properties_dict))))
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'}, {'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, {'key': 'access_rights', 'value': 'public'}, {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'}, {'key': 'provenance', 'value': 'Some statement about provenance'}, {'key': 'dcat_type', 'value': 'test-type'}, {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'}, {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'}, {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'}, {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'}, {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) assert self._triple(g, dataset_ref, SCHEMA.description, dataset['notes']) assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) # Dates assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['metadata_created']) assert self._triple(g, dataset_ref, SCHEMA.dateModified, dataset['metadata_modified']) # Tags eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, SCHEMA.keywords, tag['name']) # List for item in [ ('language', SCHEMA.inLanguage, Literal), ('theme', SCHEMA.about, URIRef), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'license_title': 'CC-BY 3.0', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'}, {'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, {'key': 'access_rights', 'value': 'public'}, {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'}, {'key': 'provenance', 'value': 'Some statement about provenance'}, {'key': 'dcat_type', 'value': 'test-type'}, {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'}, {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'}, {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'}, {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'}, {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) assert self._triple(g, dataset_ref, SCHEMA.description, dataset['notes']) assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) assert self._triple(g, dataset_ref, SCHEMA.license, dataset['license_url']) assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) url = self._triple(g, dataset_ref, SCHEMA.url, None)[2] assert url eq_(url, Literal('http://test.ckan.net/dataset/%s' % dataset['name'])) # Dates assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['metadata_created']) assert self._triple(g, dataset_ref, SCHEMA.dateModified, dataset['metadata_modified']) # Tags eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, SCHEMA.keywords, tag['name']) # List for item in [ ('language', SCHEMA.inLanguage, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, {'key': 'access_rights', 'value': 'public'}, {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'}, {'key': 'provenance', 'value': 'Some statement about provenance'}, {'key': 'dcat_type', 'value': 'test-type'}, {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'}, {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'}, {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'}, {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'}, {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) assert self._triple(g, dataset_ref, DCT.accessRights, extras['access_rights']) assert self._triple(g, dataset_ref, DCT.provenance, extras['provenance']) assert self._triple(g, dataset_ref, DCT.type, extras['dcat_type']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language, Literal), ('theme', DCAT.theme, URIRef), ('conforms_to', DCT.conformsTo, Literal), ('alternate_identifier', ADMS.identifier, Literal), ('documentation', FOAF.page, Literal), ('related_resource', DCT.relation, Literal), ('has_version', DCT.hasVersion, Literal), ('is_version_of', DCT.isVersionOf, Literal), ('source', DCT.source, Literal), ('sample', ADMS.sample, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))
def graph_from_dataset(self, dataset_dict, dataset_ref): title = dataset_dict.get('title') g = self.g for prefix, namespace in it_namespaces.iteritems(): g.bind(prefix, namespace) ### add a further type for the Dataset node g.add((dataset_ref, RDF.type, DCATAPIT.Dataset)) ### replace themes value = self._get_dict_value(dataset_dict, 'theme') if value: for theme in value.split(','): self.g.remove((dataset_ref, DCAT.theme, URIRef(theme))) theme = theme.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + theme))) self._add_concept(THEME_CONCEPTS, theme) else: self.g.add((dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + DEFAULT_THEME_KEY))) self._add_concept(THEME_CONCEPTS, DEFAULT_THEME_KEY) ### replace languages value = self._get_dict_value(dataset_dict, 'language') if value: for lang in value.split(','): self.g.remove((dataset_ref, DCT.language, Literal(lang))) lang = lang.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang))) # self._add_concept(LANG_CONCEPTS, lang) ### add spatial (EU URI) value = self._get_dict_value(dataset_dict, 'geographical_name') if value: for gname in value.split(','): gname = gname.replace('{', '').replace('}', '') dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) # Try and add a Concept from the spatial vocabulary if self._add_concept(GEO_CONCEPTS, gname): self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(GEO_BASE_URI + gname))) # geo concept is not really required, but may be a useful adding self.g.add((dct_location, LOCN.geographicalName, URIRef(GEO_BASE_URI + gname))) else: # The dataset field is not a controlled tag, let's create a Concept out of the label we have concept = BNode() self.g.add((concept, RDF['type'], SKOS.Concept)) self.g.add((concept, SKOS.prefLabel, Literal(gname))) self.g.add((dct_location, LOCN.geographicalName, concept)) ### add spatial (GeoNames) value = self._get_dict_value(dataset_dict, 'geographical_geonames_url') if value: dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(value))) ### replace periodicity self._remove_node(dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, None, Literal)) self._add_uri_node( dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef), FREQ_BASE_URI) # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY)) ### replace landing page self._remove_node(dataset_dict, dataset_ref, ('url', DCAT.landingPage, None, URIRef)) landing_page_uri = None if dataset_dict.get('name'): landing_page_uri = '{0}/dataset/{1}'.format( catalog_uri().rstrip('/'), dataset_dict['name']) else: landing_page_uri = dataset_uri( dataset_dict) # TODO: preserve original URI if harvested self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri))) ### conformsTo self.g.remove((dataset_ref, DCT.conformsTo, None)) value = self._get_dict_value(dataset_dict, 'conforms_to') if value: for item in value.split(','): standard = BNode() self.g.add((dataset_ref, DCT.conformsTo, standard)) self.g.add((standard, RDF['type'], DCT.Standard)) self.g.add((standard, RDF['type'], DCATAPIT.Standard)) self.g.add((standard, DCT.identifier, Literal(item))) ### publisher # DCAT by default creates this node # <dct:publisher> # <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a"> # <foaf:name>orga2_test</foaf:name> # </foaf:Organization> # </dct:publisher> for s, p, o in g.triples((dataset_ref, DCT.publisher, None)): #log.info("Removing publisher %r", o) g.remove((s, p, o)) self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher) ### Rights holder : Agent holder_ref = self._add_agent(dataset_dict, dataset_ref, 'holder', DCT.rightsHolder) ### Autore : Agent self._add_agent(dataset_dict, dataset_ref, 'creator', DCT.creator) ### Point of Contact # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/> # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri --> # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"> # <rdf:type rdf:resource="&vcard;Kind"/> # <rdf:type rdf:resource="&vcard;Organization"/> # <vcard:hasEmail rdf:resource="mailto:[email protected]"/> # <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn> # </dcatapit:Organization> # TODO: preserve original info if harvested # retrieve the contactPoint added by the euro serializer euro_poc = g.value(subject=dataset_ref, predicate=DCAT.contactPoint, object=None, any=False) # euro poc has this format: # <dcat:contactPoint> # <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e"> # <vcard:fn>THE ORGANIZATION NAME</vcard:fn> # <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail> # </vcard:Organization> # </dcat:contactPoint> if euro_poc: g.remove((dataset_ref, DCAT.contactPoint, euro_poc)) org_id = dataset_dict.get('organization', {}).get('id') # get orga info org_show = logic.get_action('organization_show') try: org_dict = org_show({}, { 'id': org_id, 'include_datasets': False, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False }) except Exception, e: org_dict = {}