def test_access_via_attribute(): license = LicenseRegister()["cc-by"] assert license.od_conformance == "approved"
def test_access_via_dict_2(): license = LicenseRegister()["cc-by"] license_dict = license.as_dict() assert license_dict["is_okd_compliant"] assert not license_dict["is_osi_compliant"]
def test_import_v2_style_register(): reg = LicenseRegister() license = reg["CC-BY-4.0"] assert license.url == "https://creativecommons.org/licenses/by/4.0/" assert license.isopen() assert license.title == "Creative Commons Attribution 4.0"
def test_access_via_attribute_2(): license = LicenseRegister()["cc-by"] assert license.is_okd_compliant assert not license.is_osi_compliant
def test_access_via_key_2(): license = LicenseRegister()["cc-by"] assert license["is_okd_compliant"] assert not license["is_osi_compliant"]
def resource_to_xml(identifier, pkg_dict, res_dict): """ Pass in DOI identifier and `Resource` and return XML in the format ready to send to DataCite API @param identifier: a DOI identifier @param resource: a CKAN Resource @return: XML-formatted metadata """ _validate_package(pkg_dict) _validate_resource(res_dict) package_doi = CeonPackageDOI.get(pkg_dict['id']) title = res_dict['name'].encode('unicode-escape') creators = _get_creators(pkg_dict['id']) resource_identifiers = _get_resource_dois(pkg_dict['id']) publisher = pkg_dict['publisher'].encode('unicode-escape') if 'publication_year' in pkg_dict: publication_year = pkg_dict['publication_year'] elif isinstance(pkg_dict['metadata_created'], datetime): publication_year = pkg_dict['metadata_created'].year else: publication_year = parser.parse(pkg_dict['metadata_created']).year description = res_dict.get('description', '').encode('unicode-escape') license_id = CeonResourceLicense.get(res_dict['id']).license_id license = LicenseRegister()[license_id] if license: license_url = license.url license_title = license.title.encode('unicode-escape') file_format = res_dict.get('format', '') if file_format: file_format = file_format.encode('unicode-escape') file_size = res_dict.get('size', '') if file_size: file_size = file_size.encode('unicode-escape') date_available = parser.parse(res_dict.get('created')).strftime( '%Y-%m-%d') if 'created' in res_dict else None if 'last_modified' in res_dict and res_dict['last_modified']: date_updated = parser.parse( res_dict.get('last_modified')).strftime('%Y-%m-%d') else: date_updated = date_available # Prepare metadata metadata = etree.Element('{%s}resource' % (METADATA_NAMESPACE), nsmap={ None: METADATA_NAMESPACE, 'xsi': XSI_NAMESPACE }) metadata.set('{%s}schemaLocation' % (XSI_NAMESPACE), XSI_SCHEMALOCATION) e_identifier = etree.Element('identifier', identifierType='DOI') e_identifier.text = identifier metadata.append(e_identifier) e_titles = etree.SubElement(metadata, 'titles') etree.SubElement(e_titles, 'title').text = title e_creators = etree.SubElement(metadata, 'creators') for c in creators: e_creators.append(c) etree.SubElement(metadata, 'publisher').text = publisher etree.SubElement(metadata, 'publicationYear').text = \ '{}'.format(publication_year) e_related_identifiers = etree.SubElement(metadata, 'relatedIdentifiers') e_related_identifier = etree.Element('relatedIdentifier', relatedIdentifierType='DOI', relationType='IsPartOf') e_related_identifier.text = package_doi.identifier e_related_identifiers.append(e_related_identifier) if description: e_descriptions = etree.SubElement(metadata, 'descriptions') e_description = etree.Element('description', descriptionType='Other') e_description.text = description e_descriptions.append(e_description) e_rights_list = etree.SubElement(metadata, 'rightsList') e_rights = etree.Element('rights', rightsURI='info:eu-repo/semantics/openAccess') e_rights_list.append(e_rights) if license_id: if license_url: e_rights = etree.Element('rights', rightsURI=license_url) else: e_rights = etree.Element('rights') e_rights.text = license_title if license_title else license_id e_rights_list.append(e_rights) if file_format: e_formats = etree.SubElement(metadata, 'formats') etree.SubElement(e_formats, 'format').text = file_format if file_size: e_sizes = etree.SubElement(metadata, 'sizes') etree.SubElement(e_sizes, 'size').text = \ '{}'.format(file_size) if date_available: e_dates = etree.SubElement(metadata, 'dates') e_date = etree.Element('date', dateType='Available') e_date.text = date_available e_dates.append(e_date) if date_updated: if not e_dates: e_dates = etree.SubElement(metadata, 'dates') e_date = etree.Element('date', dateType='Updated') e_date.text = date_updated e_dates.append(e_date) return etree.tostring(metadata, pretty_print=True)
def package_to_xml(identifier, pkg_dict): """ Pass in DOI identifier and `Package` and return XML in the format ready to send to DataCite API @param identifier: a DOI identifier @param package: a CKAN Package @return: XML-formatted metadata """ _validate_package(pkg_dict) title = pkg_dict['title'].encode('unicode-escape') creators = _get_creators(pkg_dict['id']) resource_identifiers = _get_resource_dois(pkg_dict['id']) publisher = pkg_dict['publisher'].encode('unicode-escape') if 'publication_year' in pkg_dict: publication_year = pkg_dict['publication_year'] elif isinstance(pkg_dict['metadata_created'], datetime): publication_year = pkg_dict['metadata_created'].year else: publication_year = parser.parse(pkg_dict['metadata_created']).year license_title = LicenseRegister()[PKG_LICENSE_ID].title.encode( 'unicode-escape') subject = _ensure_list(pkg_dict.get('tag_string', '').split(',')) subject.sort() description = pkg_dict.get('notes', '').encode('unicode-escape') oa_funder = _get_first_elem(pkg_dict, 'oa_funder') if oa_funder and '-' in oa_funder: oa_funder = oa_funder[0:oa_funder.find('-')].strip() oa_funding_program = _get_first_elem(pkg_dict, 'oa_funding_program') res_type = _get_first_elem(pkg_dict, 'res_type') if res_type and '-' in res_type: res_type = res_type[0:res_type.find('-')].strip() sci_discipline = _get_first_elem(pkg_dict, 'sci_discipline') if sci_discipline and '-' in sci_discipline: sci_discipline = sci_discipline[0:sci_discipline.find('-')].strip() oa_grant_number = pkg_dict.get('oa_grant_number', '').encode('unicode-escape') rel_citation = pkg_dict.get('rel_citation', '').encode('unicode-escape') version = None if 'version' in pkg_dict: version = pkg_dict.get('version') if version: version = version.encode('unicode-escape') if sci_discipline: if subject: subject.append(sci_discipline) else: subject = [sci_discipline] # Prepare metadata metadata = etree.Element('{%s}resource' % (METADATA_NAMESPACE), nsmap={ None: METADATA_NAMESPACE, 'xsi': XSI_NAMESPACE }) metadata.set('{%s}schemaLocation' % (XSI_NAMESPACE), XSI_SCHEMALOCATION) e_identifier = etree.Element('identifier', identifierType='DOI') e_identifier.text = identifier metadata.append(e_identifier) e_titles = etree.SubElement(metadata, 'titles') etree.SubElement(e_titles, 'title').text = title e_creators = etree.SubElement(metadata, 'creators') for c in creators: e_creators.append(c) etree.SubElement(metadata, 'publisher').text = publisher etree.SubElement(metadata, 'publicationYear').text = \ '{}'.format(publication_year) e_rights_list = etree.SubElement(metadata, 'rightsList') etree.SubElement(e_rights_list, 'rights').text = license_title if subject: e_subjects = etree.SubElement(metadata, 'subjects') for s in _ensure_list(subject): etree.SubElement(e_subjects, 'subject').text = s if description: e_descriptions = etree.SubElement(metadata, 'descriptions') e_description = etree.Element('description', descriptionType='Abstract') e_description.text = description e_descriptions.append(e_description) if rel_citation or len(resource_identifiers) > 0: e_rel_identifiers = etree.SubElement(metadata, 'relatedIdentifiers') if rel_citation: e_rel_identifier = etree.Element('relatedIdentifier', relatedIdentifierType='URL', relationType='IsReferencedBy') e_rel_identifier.text = rel_citation e_rel_identifiers.append(e_rel_identifier) for e_related_identifier in resource_identifiers: e_rel_identifiers.append(e_related_identifier) if oa_funder: e_contributors = etree.SubElement(metadata, 'contributors') if oa_funding_program and oa_grant_number: oa_funding_id = oa_funding_program.replace('-', '/') project_info = 'info:eu-repo/grantAgreement/{0}/{1}///' project_info = project_info.format(oa_funding_id, oa_grant_number) e_contributor = etree.Element('contributor', contributorType='Funder') etree.SubElement(e_contributor, 'contributorName').text = oa_funder e_name_identifier = etree.Element('nameIdentifier', nameIdentifierScheme='info') e_name_identifier.text = project_info e_contributor.append(e_name_identifier) else: e_contributor = etree.Element('contributor', contributorType='Funder') etree.SubElement(e_contributor, 'contributorName').text = oa_funder e_contributors.append(e_contributor) if res_type: e_resource_type = etree.Element('resourceType', resourceTypeGeneral=res_type) metadata.append(e_resource_type) if version: etree.SubElement(metadata, 'version').text = version return etree.tostring(metadata, pretty_print=True)
def export_resource_to_rdf(resource_dict, dataset_dict, _format='xml'): """Export the resource in RDF format. Builds an RDF Graph containing only the selected resource and exports it to the selected format (default ``xml``). :param dict resource_dict: resource metadata. :param dict dataset_dict: dataset metadata. :param str _format: export format. Default is ``xml``. :returns: the serialized RDF graph of the resource. :rtype: """ g = Graph() distribution = URIRef(resource_uri(resource_dict)) g.add((distribution, RDF.type, DCAT.Distribution)) if 'license' not in resource_dict and 'license_id' in dataset_dict: lr = LicenseRegister() _license = lr.get(dataset_dict['license_id']) resource_dict['license'] = _license.url # Simple values items = [ ('name', DCT.title, None, Literal), ('description', DCT.description, None, Literal), ('status', ADMS.status, None, Literal), ('rights', DCT.rights, None, Literal), ('license', DCT.license, None, URIRef), ] for itm in items: key, rdf_prop, def_value, rdf_type = itm value = resource_dict.get(key, def_value) if value: g.add((distribution, rdf_prop, rdf_type(value))) # Lists items = [ ('documentation', FOAF.page, None, URIRef), ('language', DCT.language, None, URIRef), ('conforms_to', DCT.conformsTo, None, URIRef), ] # self._add_list_triples_from_dict(resource_dict, distribution, items) for itm in items: key, rdf_prop, def_value, rdf_type = itm value = resource_dict.get(key, def_value) if value: if isinstance(value, list): for val in value: g.add((distribution, rdf_prop, rdf_type(val))) else: g.add((distribution, rdf_prop, rdf_type(value))) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: g.add((distribution, DCAT.downloadURL, URIRef(download_url))) if (url and not download_url) or (url and url != download_url): g.add((distribution, DCAT.accessURL, URIRef(url))) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] #self._add_date_triples_from_dict(resource_dict, distribution, items) for itm in items: key, rdf_prop, def_value, rdf_type = itm value = resource_dict.get(key, def_value) if value: g.add((distribution, rdf_prop, rdf_type(value))) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add( (distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef(resource_dict['hash_algorithm']))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum)) return g.serialize(format=_format)
def graph_from_dataset(self, dataset_dict, dataset_ref): g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None, Literal), ('notes', DCT.description, None, Literal), ('url', DCAT.landingPage, None, URIRef), ('identifier', DCT.identifier, ['guid', 'id'], Literal), # FIXME: Should use the global unique identifer ('version', OWL.versionInfo, ['dcat_version'], Literal), ('version_notes', ADMS.versionNotes, None, Literal), ('frequency', DCT.accrualPeriodicity, None, URIRef), ('subject', DCT.subject, None, URIRef), ('provenance', DCT.provenance, None, URIRef), ('creator', DCT.creator, None, URIRef), ('is_part_of', DCT.ispartof, None, URIRef) ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DCT.issued, ['metadata_created'], Literal), ('modified', DCT.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None, URIRef), ('theme', DCAT.theme, None, URIRef), ('spatial_uri', DCT.spatial, None, URIRef), ('conforms_to', DCT.conformsTo, None, URIRef), ('alternate_identifier', ADMS.identifier, None, Literal), ('documentation', FOAF.page, None, URIRef), ('access_rights', DCT.accessRights, None, URIRef), ('access_rights_comment', DCT.accessRightsComment, None, URIRef), ('related_resource', DCT.relation, None, URIRef), ('has_version', DCT.hasVersion, None, Literal), ('is_version_of', DCT.isVersionOf, None, Literal), ('source', DCT.source, None, Literal), ('sample', ADMS.sample, None, Literal), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(contact_uri) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Kind)) # FIXME: DIFI doesn't like VCARD.Organization g.add((dataset_ref, DCAT.contactPoint, contact_details)) items = [ ('contact_name', VCARD.fn, ['maintainer', 'author'], Literal), ('contact_email', VCARD.hasEmail, ['maintainer_email', 'author_email'], Literal), ] self._add_triples_from_dict(dataset_dict, contact_details, items) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), self._get_dataset_value(dataset_dict, 'publisher_identifier'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Agent)) # FIXME: DIFI doesn't like FOAF.Organization g.add((dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [('publisher_email', FOAF.mbox, None, Literal), ('publisher_identifier', DCT.identifier, None, Literal), ('publisher_url', FOAF.homepage, None, URIRef), ('publisher_type', DCT.type, None, Literal)] self._add_triples_from_dict(dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start') end = self._get_dataset_value(dataset_dict, 'temporal_end') if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial # ----------------------------------------- # When I use this code, I get # dct:spatial <[u'http://sws.geonames.org/3144096/']> ; # when I want # dct:spatial <http://sws.geonames.org/3144096/> ; # So for now I'll just comment out this section, and treat spatial like language and theme #----------------------------------------- #spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri') #spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text') #spatial_geom = self._get_dataset_value(dataset_dict, 'spatial') #if spatial_uri or spatial_text or spatial_geom: # if spatial_uri: # spatial_ref = URIRef(spatial_uri) # else: # spatial_ref = BNode() # g.add((spatial_ref, RDF.type, DCT.Location)) # g.add((dataset_ref, DCT.spatial, spatial_ref)) # if spatial_text: # g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) # if spatial_geom: # # GeoJSON # g.add((spatial_ref, # LOCN.geometry, # Literal(spatial_geom, datatype=GEOJSON_IMT))) # # WKT, because GeoDCAT-AP says so # try: # g.add((spatial_ref, # LOCN.geometry, # Literal(wkt.dumps(json.loads(spatial_geom), # decimals=4), # datatype=GSP.wktLiteral))) # except (TypeError, ValueError, InvalidGeoJSONException): # pass # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_uri(resource_dict)) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if 'license' not in resource_dict and 'license_id' in dataset_dict: lr = LicenseRegister() _license = lr.get(dataset_dict['license_id']) resource_dict['license'] = _license.url # Simple values items = [ ('name', DCT.title, None, Literal), ('description', DCT.description, None, Literal), ('status', ADMS.status, None, Literal), ('rights', DCT.rights, None, Literal), ('license', DCT.license, None, URIRef), ] self._add_triples_from_dict(resource_dict, distribution, items) # Lists items = [ ('documentation', FOAF.page, None, URIRef), ('language', DCT.language, None, URIRef), ('conforms_to', DCT.conformsTo, None, URIRef), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: g.add((distribution, DCAT.downloadURL, URIRef(download_url))) if (url and not download_url) or (url and url != download_url): g.add((distribution, DCAT.accessURL, URIRef(url))) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef(resource_dict['hash_algorithm']))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum))
def test_access_via_key(self): license = LicenseRegister()['cc-by'] assert_equal(license['is_okd_compliant'], True) assert_equal(license['is_osi_compliant'], False)
def test_access_via_dict(self): license = LicenseRegister()['cc-by'] license_dict = license.as_dict() assert_equal(license_dict['is_okd_compliant'], True) assert_equal(license_dict['is_osi_compliant'], False)
def test_access_via_attribute(self): license = LicenseRegister()['cc-by'] assert_equal(license.is_okd_compliant, True) assert_equal(license.is_osi_compliant, False)
def test_access_via_dict(self): license = LicenseRegister()['cc-by'] license_dict = license.as_dict() assert_equal(license_dict['od_conformance'], 'approved') assert_equal(license_dict['osd_conformance'], 'not reviewed')
def test_access_via_key(self): license = LicenseRegister()['cc-by'] assert_equal(license['od_conformance'], 'approved')
def test_access_via_attribute(self): license = LicenseRegister()['cc-by'] assert_equal(license.od_conformance, 'approved')
def setup(self): self.licenses = LicenseRegister()