Beispiel #1
0
class TestLicense(TestCase):
    def setup(self):
        self.licenses = LicenseRegister()

    def teardown(self):
        self.licenses = None

    def test_keys(self):
        for license_id in self.licenses.keys():
            self.assert_unicode(license_id)

    def test_values(self):
        for license in self.licenses.values():
            self.assert_unicode(license.id)

    def test_iter(self):
        for license_id in self.licenses:
            self.assert_unicode(license_id)

    def test_getitem(self):
        for license_id in self.licenses.keys():
            license = self.licenses[license_id]
            self.assert_unicode(license.id)
            self.assert_unicode(license.title)
            self.assert_datetime(license.date_created)
            self.assert_unicode(license.url)
Beispiel #2
0
class TestLicense(TestCase):
    def setup(self):
        self.licenses = LicenseRegister()

    def teardown(self):
        self.licenses = None

    def test_keys(self):
        for license_id in self.licenses.keys():
            self.assert_unicode(license_id)

    def test_values(self):
        for license in self.licenses.values():
            self.assert_unicode(license.id)

    def test_iter(self):
        for license_id in self.licenses:
            self.assert_unicode(license_id)

    def test_getitem(self):
        for license_id in self.licenses.keys():
            license = self.licenses[license_id]
            self.assert_unicode(license.id)
            self.assert_unicode(license.title)
            self.assert_unicode(license.url)
Beispiel #3
0
    def license_2_license_id(self, license_title, logger=None):
        # import is here, as it creates a dependency on ckan, which
        # many importers won't want
        from ckan.model.license import LicenseRegister

        licenses = LicenseRegister()
        license_obj = licenses.get_by_title(license_title)
        if license_obj:
            return u"%s" % license_obj.id
        else:
            logger("Warning: No license name matches '%s'. Ignoring license." % license_title)
 def get_by_title(self, title, default=None):
     from ckan.model.license import LicenseRegister
     from ckan.model.license import License
     
     licenseRegister = LicenseRegister()
     licenses = licenseRegister.items()        
     for i in range(len(licenses)):
         license = licenses[i][1]
         if title == unicode(license.title):
             return license.id
     else:
         return default
Beispiel #5
0
    def license_2_license_id(self, license_title, logger=None):
        """
        Gets the license ID based on the license description value. If doesn't exist then return empty.
        """

        # import is here, as it creates a dependency on ckan, which many importers won't want
        from ckan.model.license import LicenseRegister
        licenses = LicenseRegister()
        license_obj = licenses.get(license_title)

        if license_obj:
            return u'%s' % license_obj.id
        else:
            log.warn('Warning: No license name matches %s. Ignoring license.' % license_title)
            return u''
    def _license(self, dataset_ref):
        '''
        Returns a license identifier if one of the distributions license is
        found in CKAN license registry. If no distribution's license matches,
        an empty string is returned.

        The first distribution with a license found in the registry is used so
        that if distributions have different licenses we'll only get the first
        one.
        '''
        log.debug('Obteniendo licencias')
        license_id_final = ''
        license_title_final = ''
        license_id_rdf = self._object_value(dataset_ref, DCT.license)
        log.debug('Licencia Obtenida: %s ', license_id_rdf)
        for license_id, license in LicenseRegister().items():
            log.debug('Tratando licencia: %s ', license_id)
            if license_id == license_id_rdf:
                log.debug('Encontrada licencia')
                license_id_final = license_id
                license_title_final = license.title
                break

        log.debug('Licencias que se insertan en el dataset: %s, %s ',
                  license_id_final, license_title_final)

        return license_id_final, license_title_final
Beispiel #7
0
def test_import_v1_style_register():
    reg = LicenseRegister()

    license = reg["cc-by"]
    assert license.url == "http://www.opendefinition.org/licenses/cc-by"
    assert license.isopen()
    assert license.title == "Creative Commons Attribution"
Beispiel #8
0
    def _license(self, dataset_ref):
        '''
        Returns a license identifier if one of the distributions license is
        found in CKAN license registry. If no distribution's license matches,
        None is returned.

        The first distribution with a license found in the registry is used so
        that if distributions have different licenses we'll only get the first
        one.
        '''
        if self._licenceregister_cache is not None:
            license_uri2id, license_title2id = self._licenceregister_cache
        else:
            license_uri2id = {}
            license_title2id = {}
            for license_id, license in LicenseRegister().items():
                license_uri2id[license.url] = license_id
                license_title2id[license.title] = license_id
            self._licenceregister_cache = license_uri2id, license_title2id

        for distribution in self._distributions(dataset_ref):
            # If distribution has a license, attach it to the dataset
            license = self._object(distribution, DCT.license)
            if license:
                # Try to find a matching license comparing URIs, then titles
                license_id = license_uri2id.get(license.toPython())
                if license_id is None:
                    license_id = license_title2id.get(
                        self._object_value(license, DCT.title))
                if license_id is not None:
                    return license_id
        return None
Beispiel #9
0
    def license_2_license_id(self, license_title, logger=None):
        """
        Gets the license ID based on the license description value. If doesn't exist then return empty.
        """

        # import is here, as it creates a dependency on ckan, which many importers won't want
        from ckan.model.license import LicenseRegister
        licenses = LicenseRegister()
        license_obj = licenses.get(license_title)

        if license_obj:
            return u'%s' % license_obj.id
        else:
            log.warn('Warning: No license name matches %s. Ignoring license.' %
                     license_title)
            return u''
Beispiel #10
0
def test_default_register_has_basic_properties_of_a_license():
    config["licenses_group_url"] = None
    reg = LicenseRegister()

    license = reg["cc-by"]
    assert license.url == "http://www.opendefinition.org/licenses/cc-by"
    assert license.isopen()
    assert license.title == "Creative Commons Attribution"
Beispiel #11
0
    def test_import_v2_style_register(self):
        reg = LicenseRegister()

        license = reg['CC-BY-4.0']
        assert_equal(license.url,
                     'https://creativecommons.org/licenses/by/4.0/')
        assert_equal(license.isopen(), True)
        assert_equal(license.title, 'Creative Commons Attribution 4.0')
Beispiel #12
0
    def test_import_v1_style_register(self):
        reg = LicenseRegister()

        license = reg['cc-by']
        assert_equal(license.url,
                     'http://www.opendefinition.org/licenses/cc-by')
        assert_equal(license.isopen(), True)
        assert_equal(license.title, 'Creative Commons Attribution')
Beispiel #13
0
    def test_default_register_has_basic_properties_of_a_license(self):
        config['licenses_group_url'] = None
        reg = LicenseRegister()

        license = reg['cc-by']
        assert_equal(license.url,
                     'http://www.opendefinition.org/licenses/cc-by')
        assert_equal(license.isopen(), True)
        assert_equal(license.title, 'Creative Commons Attribution')
    def test_import_v1_style_register(self):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        # v1 is used by CKAN so far
        register_filepath = '%s/licenses.v1' % this_dir
        config['licenses_group_url'] = 'file:///%s' % register_filepath
        reg = LicenseRegister()

        license = reg['cc-by']
        assert_equal(license.url,
                     'http://www.opendefinition.org/licenses/cc-by')
        assert_equal(license.isopen(), True)
        assert_equal(license.title, 'Creative Commons Attribution')
    def test_import_v2_style_register(self):
        this_dir = os.path.dirname(os.path.realpath(__file__))
        # v2 is used by http://licenses.opendefinition.org in recent times
        register_filepath = '%s/licenses.v2' % this_dir
        config['licenses_group_url'] = 'file:///%s' % register_filepath
        reg = LicenseRegister()

        license = reg['CC-BY-4.0']
        assert_equal(license.url,
                     'https://creativecommons.org/licenses/by/4.0/')
        assert_equal(license.isopen(), True)
        assert_equal(license.title, 'Creative Commons Attribution 4.0')
Beispiel #16
0
 def test_register_get_ok(self):
     from ckan.model.license import LicenseRegister
     register = LicenseRegister()
     assert len(register), "No changesets found in model."
     offset = self.offset('/rest/licenses')
     res = self.app.get(offset, status=[200])
     licenses_data = self.data_from_res(res)
     assert len(licenses_data) == len(register), (len(licenses_data), len(register))
     for license_data in licenses_data:
         id = license_data['id']
         license = register[id]
         assert license['title'] == license.title
         assert license['url'] == license.url
Beispiel #17
0
def test_access_via_dict():
    license = LicenseRegister()["cc-by"]
    license_dict = license.as_dict()
    assert license_dict["is_okd_compliant"]
    assert not license_dict["is_osi_compliant"]
Beispiel #18
0
def test_access_via_attribute_2():
    license = LicenseRegister()["cc-by"]
    assert license.od_conformance
    assert license.osd_conformance == "not reviewed"
Beispiel #19
0
 def test_access_via_attribute(self):
     license = LicenseRegister()['cc-by']
     assert_equal(license.is_okd_compliant, True)
     assert_equal(license.is_osi_compliant, False)
Beispiel #20
0
def test_access_via_key():
    license = LicenseRegister()["cc-by"]
    assert license["is_okd_compliant"]
    assert not license["is_osi_compliant"]
Beispiel #21
0
 def test_access_via_key(self):
     license = LicenseRegister()['cc-by']
     assert_equal(license['od_conformance'], 'approved')
Beispiel #22
0
 def test_access_via_dict(self):
     license = LicenseRegister()['cc-by']
     license_dict = license.as_dict()
     assert_equal(license_dict['od_conformance'], 'approved')
     assert_equal(license_dict['osd_conformance'], 'not reviewed')
Beispiel #23
0
 def setup(self):
     self.licenses = LicenseRegister()
Beispiel #24
0
 def test_access_via_attribute(self):
     license = LicenseRegister()['cc-by']
     assert_equal(license.od_conformance, 'approved')
Beispiel #25
0
def test_access_via_attribute():
    license = LicenseRegister()["cc-by"]
    assert license.od_conformance == "approved"
Beispiel #26
0
 def setup(self):
     self.licenses = LicenseRegister()
Beispiel #27
0
    def resource_to_xml(identifier, pkg_dict, res_dict):
        """
        Pass in DOI identifier and `Resource` and return XML in the format
        ready to send to DataCite API

        @param identifier: a DOI identifier
        @param resource: a CKAN Resource
        @return: XML-formatted metadata
        """
        _validate_package(pkg_dict)
        _validate_resource(res_dict)
        package_doi = CeonPackageDOI.get(pkg_dict['id'])
        title = res_dict['name'].encode('unicode-escape')
        creators = _get_creators(pkg_dict['id'])
        resource_identifiers = _get_resource_dois(pkg_dict['id'])
        publisher = pkg_dict['publisher'].encode('unicode-escape')
        if 'publication_year' in pkg_dict:
            publication_year = pkg_dict['publication_year']
        elif isinstance(pkg_dict['metadata_created'], datetime):
            publication_year = pkg_dict['metadata_created'].year
        else:
            publication_year = parser.parse(pkg_dict['metadata_created']).year
        description = res_dict.get('description', '').encode('unicode-escape')
        license_id = CeonResourceLicense.get(res_dict['id']).license_id
        license = LicenseRegister()[license_id]
        if license:
            license_url = license.url
            license_title = license.title.encode('unicode-escape')
        file_format = res_dict.get('format', '')
        if file_format:
            file_format = file_format.encode('unicode-escape')
        file_size = res_dict.get('size', '')
        if file_size:
            file_size = file_size.encode('unicode-escape')
        date_available = parser.parse(res_dict.get('created')).strftime(
            '%Y-%m-%d') if 'created' in res_dict else None
        if 'last_modified' in res_dict and res_dict['last_modified']:
            date_updated = parser.parse(
                res_dict.get('last_modified')).strftime('%Y-%m-%d')
        else:
            date_updated = date_available
        # Prepare metadata
        metadata = etree.Element('{%s}resource' % (METADATA_NAMESPACE),
                                 nsmap={
                                     None: METADATA_NAMESPACE,
                                     'xsi': XSI_NAMESPACE
                                 })
        metadata.set('{%s}schemaLocation' % (XSI_NAMESPACE),
                     XSI_SCHEMALOCATION)
        e_identifier = etree.Element('identifier', identifierType='DOI')
        e_identifier.text = identifier
        metadata.append(e_identifier)
        e_titles = etree.SubElement(metadata, 'titles')
        etree.SubElement(e_titles, 'title').text = title
        e_creators = etree.SubElement(metadata, 'creators')
        for c in creators:
            e_creators.append(c)
        etree.SubElement(metadata, 'publisher').text = publisher
        etree.SubElement(metadata, 'publicationYear').text = \
                '{}'.format(publication_year)
        e_related_identifiers = etree.SubElement(metadata,
                                                 'relatedIdentifiers')
        e_related_identifier = etree.Element('relatedIdentifier',
                                             relatedIdentifierType='DOI',
                                             relationType='IsPartOf')
        e_related_identifier.text = package_doi.identifier
        e_related_identifiers.append(e_related_identifier)
        if description:
            e_descriptions = etree.SubElement(metadata, 'descriptions')
            e_description = etree.Element('description',
                                          descriptionType='Other')
            e_description.text = description
            e_descriptions.append(e_description)
        e_rights_list = etree.SubElement(metadata, 'rightsList')
        e_rights = etree.Element('rights',
                                 rightsURI='info:eu-repo/semantics/openAccess')
        e_rights_list.append(e_rights)
        if license_id:
            if license_url:
                e_rights = etree.Element('rights', rightsURI=license_url)
            else:
                e_rights = etree.Element('rights')
            e_rights.text = license_title if license_title else license_id
            e_rights_list.append(e_rights)
        if file_format:
            e_formats = etree.SubElement(metadata, 'formats')
            etree.SubElement(e_formats, 'format').text = file_format
        if file_size:
            e_sizes = etree.SubElement(metadata, 'sizes')
            etree.SubElement(e_sizes, 'size').text = \
                    '{}'.format(file_size)
        if date_available:
            e_dates = etree.SubElement(metadata, 'dates')
            e_date = etree.Element('date', dateType='Available')
            e_date.text = date_available
            e_dates.append(e_date)
        if date_updated:
            if not e_dates:
                e_dates = etree.SubElement(metadata, 'dates')
            e_date = etree.Element('date', dateType='Updated')
            e_date.text = date_updated
            e_dates.append(e_date)
        return etree.tostring(metadata, pretty_print=True)
Beispiel #28
0
    def package_to_xml(identifier, pkg_dict):
        """
        Pass in DOI identifier and `Package` and return XML in the format
        ready to send to DataCite API

        @param identifier: a DOI identifier
        @param package: a CKAN Package
        @return: XML-formatted metadata
        """
        _validate_package(pkg_dict)
        title = pkg_dict['title'].encode('unicode-escape')
        creators = _get_creators(pkg_dict['id'])
        resource_identifiers = _get_resource_dois(pkg_dict['id'])
        publisher = pkg_dict['publisher'].encode('unicode-escape')
        if 'publication_year' in pkg_dict:
            publication_year = pkg_dict['publication_year']
        elif isinstance(pkg_dict['metadata_created'], datetime):
            publication_year = pkg_dict['metadata_created'].year
        else:
            publication_year = parser.parse(pkg_dict['metadata_created']).year
        license_title = LicenseRegister()[PKG_LICENSE_ID].title.encode(
            'unicode-escape')
        subject = _ensure_list(pkg_dict.get('tag_string', '').split(','))
        subject.sort()
        description = pkg_dict.get('notes', '').encode('unicode-escape')
        oa_funder = _get_first_elem(pkg_dict, 'oa_funder')
        if oa_funder and '-' in oa_funder:
            oa_funder = oa_funder[0:oa_funder.find('-')].strip()
        oa_funding_program = _get_first_elem(pkg_dict, 'oa_funding_program')
        res_type = _get_first_elem(pkg_dict, 'res_type')
        if res_type and '-' in res_type:
            res_type = res_type[0:res_type.find('-')].strip()
        sci_discipline = _get_first_elem(pkg_dict, 'sci_discipline')
        if sci_discipline and '-' in sci_discipline:
            sci_discipline = sci_discipline[0:sci_discipline.find('-')].strip()
        oa_grant_number = pkg_dict.get('oa_grant_number',
                                       '').encode('unicode-escape')
        rel_citation = pkg_dict.get('rel_citation',
                                    '').encode('unicode-escape')
        version = None
        if 'version' in pkg_dict:
            version = pkg_dict.get('version')
            if version:
                version = version.encode('unicode-escape')
        if sci_discipline:
            if subject:
                subject.append(sci_discipline)
            else:
                subject = [sci_discipline]
        # Prepare metadata
        metadata = etree.Element('{%s}resource' % (METADATA_NAMESPACE),
                                 nsmap={
                                     None: METADATA_NAMESPACE,
                                     'xsi': XSI_NAMESPACE
                                 })
        metadata.set('{%s}schemaLocation' % (XSI_NAMESPACE),
                     XSI_SCHEMALOCATION)
        e_identifier = etree.Element('identifier', identifierType='DOI')
        e_identifier.text = identifier
        metadata.append(e_identifier)
        e_titles = etree.SubElement(metadata, 'titles')
        etree.SubElement(e_titles, 'title').text = title
        e_creators = etree.SubElement(metadata, 'creators')
        for c in creators:
            e_creators.append(c)
        etree.SubElement(metadata, 'publisher').text = publisher
        etree.SubElement(metadata, 'publicationYear').text = \
                '{}'.format(publication_year)
        e_rights_list = etree.SubElement(metadata, 'rightsList')
        etree.SubElement(e_rights_list, 'rights').text = license_title
        if subject:
            e_subjects = etree.SubElement(metadata, 'subjects')
            for s in _ensure_list(subject):
                etree.SubElement(e_subjects, 'subject').text = s
        if description:
            e_descriptions = etree.SubElement(metadata, 'descriptions')
            e_description = etree.Element('description',
                                          descriptionType='Abstract')
            e_description.text = description
            e_descriptions.append(e_description)
        if rel_citation or len(resource_identifiers) > 0:
            e_rel_identifiers = etree.SubElement(metadata,
                                                 'relatedIdentifiers')
            if rel_citation:
                e_rel_identifier = etree.Element('relatedIdentifier',
                                                 relatedIdentifierType='URL',
                                                 relationType='IsReferencedBy')
                e_rel_identifier.text = rel_citation
                e_rel_identifiers.append(e_rel_identifier)
            for e_related_identifier in resource_identifiers:
                e_rel_identifiers.append(e_related_identifier)
        if oa_funder:
            e_contributors = etree.SubElement(metadata, 'contributors')
            if oa_funding_program and oa_grant_number:
                oa_funding_id = oa_funding_program.replace('-', '/')
                project_info = 'info:eu-repo/grantAgreement/{0}/{1}///'
                project_info = project_info.format(oa_funding_id,
                                                   oa_grant_number)
                e_contributor = etree.Element('contributor',
                                              contributorType='Funder')
                etree.SubElement(e_contributor,
                                 'contributorName').text = oa_funder
                e_name_identifier = etree.Element('nameIdentifier',
                                                  nameIdentifierScheme='info')
                e_name_identifier.text = project_info
                e_contributor.append(e_name_identifier)
            else:
                e_contributor = etree.Element('contributor',
                                              contributorType='Funder')
                etree.SubElement(e_contributor,
                                 'contributorName').text = oa_funder
            e_contributors.append(e_contributor)
        if res_type:
            e_resource_type = etree.Element('resourceType',
                                            resourceTypeGeneral=res_type)
            metadata.append(e_resource_type)
        if version:
            etree.SubElement(metadata, 'version').text = version
        return etree.tostring(metadata, pretty_print=True)
Beispiel #29
0
def test_access_via_dict():
    license = LicenseRegister()["cc-by"]
    license_dict = license.as_dict()
    assert license_dict["od_conformance"] == "approved"
    assert license_dict["osd_conformance"] == "not reviewed"
Beispiel #30
0
def test_import_v2_style_register():
    reg = LicenseRegister()
    license = reg["CC-BY-4.0"]
    assert license.url == "https://creativecommons.org/licenses/by/4.0/"
    assert license.isopen()
    assert license.title == "Creative Commons Attribution 4.0"
Beispiel #31
0
 def test_access_via_dict(self):
     license = LicenseRegister()['cc-by']
     license_dict = license.as_dict()
     assert_equal(license_dict['od_conformance'], 'approved')
     assert_equal(license_dict['osd_conformance'], 'not reviewed')
Beispiel #32
0
def test_access_via_key():
    license = LicenseRegister()["cc-by"]
    assert license["od_conformance"] == "approved"
Beispiel #33
0
 def test_access_via_dict(self):
     license = LicenseRegister()['cc-by']
     license_dict = license.as_dict()
     assert_equal(license_dict['is_okd_compliant'], True)
     assert_equal(license_dict['is_osi_compliant'], False)
Beispiel #34
0
def test_access_via_attribute():
    license = LicenseRegister()["cc-by"]
    assert license.is_okd_compliant
    assert not license.is_osi_compliant
Beispiel #35
0
 def test_access_via_key(self):
     license = LicenseRegister()['cc-by']
     assert_equal(license['is_okd_compliant'], True)
     assert_equal(license['is_osi_compliant'], False)
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        g = self.g

        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        # -- start
        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # Basic fields
        items = [
            ('title', DCT.title, None, Literal),
            ('notes', DCT.description, None, Literal),
            ('url', DCAT.landingPage, None, URIRef),
            ('identifier', DCT.identifier, ['guid', 'id'], Literal),
            ('version', OWL.versionInfo, ['dcat_version'], Literal),
            ('version_notes', ADMS.versionNotes, None, Literal),
            ('frequency', DCT.accrualPeriodicity, None, URIRef),
            ('subject', DCT.subject, None,
             URIRef),  # Mentioned in the vocabulary
            ('provenance', DCT.provenance, None, URIRef)
        ]
        self._add_triples_from_dict(dataset_dict, dataset_ref, items)

        # Tags
        for tag in dataset_dict.get('tags', []):
            g.add((dataset_ref, DCAT.keyword, Literal(tag['name'])))

        # Dates
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        #  Lists
        items = [('language', DCT.language, None, URIRef),
                 ('theme', DCAT.theme, None, URIRef),
                 ('spatial_uri', DCT.spatial, None, URIRef),
                 ('conforms_to', DCT.conformsTo, None, URIRef),
                 ('alternate_identifier', ADMS.identifier, None, Literal),
                 ('documentation', FOAF.page, None, URIRef),
                 ('access_rights', DCT.accessRights, None, URIRef),
                 ('related_resource', DCT.relation, None, URIRef),
                 ('has_version', DCT.hasVersion, None, Literal),
                 ('is_version_of', DCT.isVersionOf, None, Literal),
                 ('source', DCT.source, None, Literal),
                 ('sample', ADMS.sample, None, Literal)]
        self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

        # Contact details
        if any([
                self._get_dataset_value(dataset_dict, 'contact_uri'),
                self._get_dataset_value(dataset_dict, 'contact_name'),
                self._get_dataset_value(dataset_dict, 'contact_email'),
                self._get_dataset_value(dataset_dict, 'maintainer'),
                self._get_dataset_value(dataset_dict, 'maintainer_email'),
        ]):

            contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri')
            if contact_uri:
                contact_details = URIRef(contact_uri)
            else:
                contact_details = BNode()

            g.add((contact_details, RDF.type, VCARD.Kind))
            g.add((dataset_ref, DCAT.contactPoint, contact_details))

            items = [
                ('contact_name', VCARD.fn, ['maintainer'], Literal),
                ('contact_email', VCARD.hasEmail, ['maintainer_email'],
                 Literal),
            ]

            self._add_triples_from_dict(dataset_dict, contact_details, items)

        # Publisher
        if any([
                self._get_dataset_value(dataset_dict, 'publisher_uri'),
                self._get_dataset_value(dataset_dict, 'publisher_name'),
                self._get_dataset_value(dataset_dict, 'publisher_identifier'),
                dataset_dict.get('organization'),
        ]):

            publisher_uri = publisher_uri_from_dataset_dict(dataset_dict)
            if publisher_uri:
                publisher_details = URIRef(publisher_uri)
            else:
                # No organization nor publisher_uri
                publisher_details = BNode()

            g.add((publisher_details, RDF.type, FOAF.Agent))
            g.add((dataset_ref, DCT.publisher, publisher_details))

            publisher_name = self._get_dataset_value(dataset_dict,
                                                     'publisher_name')
            if not publisher_name and dataset_dict.get('organization'):
                publisher_name = dataset_dict['organization']['title']

            g.add((publisher_details, FOAF.name, Literal(publisher_name)))
            # TODO: It would make sense to fallback these to organization
            # fields but they are not in the default schema and the
            # `organization` object in the dataset_dict does not include
            # custom fields
            items = [('publisher_email', FOAF.mbox, None, Literal),
                     ('publisher_identifier', DCT.identifier, None, Literal),
                     ('publisher_url', FOAF.homepage, None, URIRef),
                     ('publisher_type', DCT.type, None, Literal)]

            self._add_triples_from_dict(dataset_dict, publisher_details, items)

        # Temporal
        start = self._get_dataset_value(dataset_dict, 'temporal_start')
        end = self._get_dataset_value(dataset_dict, 'temporal_end')
        if start or end:
            temporal_extent = BNode()

            g.add((temporal_extent, RDF.type, DCT.PeriodOfTime))
            if start:
                self._add_date_triple(temporal_extent, SCHEMA.startDate, start)
            if end:
                self._add_date_triple(temporal_extent, SCHEMA.endDate, end)
            g.add((dataset_ref, DCT.temporal, temporal_extent))

        # parts - has part/is part of

        if any([
                self._get_dataset_value(dataset_dict, 'has_part'),
                self._get_dataset_value(dataset_dict, 'is_part_of')
        ]):
            items = [('has_part', DCT.hasPart, None, URIRef),
                     ('is_part_of', DCT.isPartOf, None, URIRef)]

            self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

        # Spatial
        spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri')
        spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text')
        spatial_geom = self._get_dataset_value(dataset_dict, 'spatial')

        if spatial_uri:
            spatial_uri = get_spatial_uri(spatial_uri)  # map from code to URI

        if spatial_uri or spatial_text or spatial_geom:
            if spatial_uri:
                spatial_ref = URIRef(spatial_uri)
            else:
                spatial_ref = BNode()

            g.add((spatial_ref, RDF.type, DCT.Location))
            g.add((dataset_ref, DCT.spatial, spatial_ref))

            if spatial_text:
                g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text)))

            if spatial_geom:
                # GeoJSON
                g.add((spatial_ref, LOCN.geometry,
                       Literal(spatial_geom, datatype=GEOJSON_IMT)))
                # WKT, because GeoDCAT-AP says so
                try:
                    g.add((spatial_ref, LOCN.geometry,
                           Literal(wkt.dumps(json.loads(spatial_geom),
                                             decimals=4),
                                   datatype=GSP.wktLiteral)))
                except (TypeError, ValueError, InvalidGeoJSONException):
                    pass

        # Resources
        for resource_dict in dataset_dict.get('resources', []):

            distribution = URIRef(resource_uri(resource_dict))

            g.add((dataset_ref, DCAT.distribution, distribution))

            g.add((distribution, RDF.type, DCAT.Distribution))

            if 'license' not in resource_dict and 'license_id' in dataset_dict:
                lr = LicenseRegister()
                _license = lr.get(dataset_dict['license_id'])
                if _license:
                    resource_dict['license'] = _license.url

            #  Simple values
            items = [
                ('name', DCT.title, None, Literal),
                ('description', DCT.description, None, Literal),
                ('status', ADMS.status, None, Literal),
                ('rights', DCT.rights, None, Literal),
                ('license', DCT.license, None, URIRef),
            ]

            self._add_triples_from_dict(resource_dict, distribution, items)

            #  Lists
            items = [
                ('documentation', FOAF.page, None, URIRef),
                ('language', DCT.language, None, URIRef),
                ('conforms_to', DCT.conformsTo, None, URIRef),
            ]
            self._add_list_triples_from_dict(resource_dict, distribution,
                                             items)

            # Format
            if '/' in resource_dict.get('format', ''):
                g.add((distribution, DCAT.mediaType,
                       Literal(resource_dict['format'])))
            else:
                if resource_dict.get('format'):
                    g.add((distribution, DCT['format'],
                           Literal(resource_dict['format'])))

                if resource_dict.get('mimetype'):
                    g.add((distribution, DCAT.mediaType,
                           Literal(resource_dict['mimetype'])))

            # URL
            url = resource_dict.get('url')
            download_url = resource_dict.get('download_url')
            if download_url:
                g.add((distribution, DCAT.downloadURL, URIRef(download_url)))
            if (url and not download_url) or (url and url != download_url):
                g.add((distribution, DCAT.accessURL, URIRef(url)))

            # Dates
            items = [
                ('issued', DCT.issued, None, Literal),
                ('modified', DCT.modified, None, Literal),
            ]

            self._add_date_triples_from_dict(resource_dict, distribution,
                                             items)

            # Numbers
            if resource_dict.get('size'):
                try:
                    g.add((distribution, DCAT.byteSize,
                           Literal(float(resource_dict['size']),
                                   datatype=XSD.decimal)))
                except (ValueError, TypeError):
                    g.add((distribution, DCAT.byteSize,
                           Literal(resource_dict['size'])))
            # Checksum
            if resource_dict.get('hash'):
                checksum = BNode()
                g.add((checksum, SPDX.checksumValue,
                       Literal(resource_dict['hash'], datatype=XSD.hexBinary)))

                if resource_dict.get('hash_algorithm'):
                    if resource_dict['hash_algorithm'].startswith('http'):
                        g.add((checksum, SPDX.algorithm,
                               URIRef(resource_dict['hash_algorithm'])))
                    else:
                        g.add((checksum, SPDX.algorithm,
                               Literal(resource_dict['hash_algorithm'])))
                g.add((distribution, SPDX.checksum, checksum))
Beispiel #37
0
 def test_access_via_dict(self):
     license = LicenseRegister()['cc-by']
     license_dict = license.as_dict()
     assert_equal(license_dict['is_okd_compliant'], True)
     assert_equal(license_dict['is_osi_compliant'], False)