def test_tokenizer(self):

        load_license(self.g)
        Session.flush()
        tokens = License.get_as_tokens()
        self.assertTrue(len(tokens.keys()) > 0)

        from_token, default = License.find_by_token('cc-by-sa')
        self.assertFalse(default)
        self.assertTrue(from_token)
        self.assertTrue('ccbysa' in from_token.uri.lower())

        from_token, default = License.find_by_token('cc-zero')  # http://opendefinition.org/licenses/cc-zero/')
        self.assertFalse(default)
        self.assertTrue(from_token)

        self.assertTrue('PublicDomain' in from_token.license_type)

        from_token, default = License.find_by_token('Creative Commons Attribuzione')  # http://opendefinition.org/licenses/cc-zero/')
        self.assertFalse(default)
        self.assertTrue(from_token)

        self.assertTrue('Attribution' in from_token.license_type)

        odbl = """["Open Data Commons Open Database License / OSM (ODbL/OSM): You are free to copy, distribute, transmit and adapt our data, as long as you credit OpenStreetMap and its contributors\nIf you alter or build upon our data, you may distribute the result only under the same licence. (http://www.openstreetmap.org/copyright)"]"""

        from_token, default = License.find_by_token(odbl, 'other')
        self.assertFalse(default)
        self.assertTrue(from_token)
        self.assertTrue('odbl' in from_token.default_name.lower())
Example #2
0
def map_ckan_license(harvest_object=None, pkg_dict=None):
    """
    license in resources' extra:
        if it exists, perform simple validation. If not valid, replace with the unknown license type
        if it does not exist, try to map the dataset's license to a license in the controlled voc
        fallback to the unknown license type
    :param harvest_object:
    :param pkg_dict:
    :type harvest_object: HarvestObject model
    :type pkg_dict: dict dictized dataset

    :return: This will return dataset's dict with modified licenses
    :rtype: dict with dictized dataset
    """
    if not (harvest_object or pkg_dict) or (harvest_object and pkg_dict):
        raise ValueError(
            "You should provide either harvest_object or pkg_dict")

    if harvest_object:
        data = json.loads(harvest_object.content)
    else:
        data = pkg_dict

    dataset_license = get_license_from_package(data)

    for res in data.get('resources') or []:
        if res.get('license_type'):
            l, _ = License.find_by_token(res['license_type'])
            res['license_type'] = l.uri
        else:
            res['license_type'] = dataset_license.uri
    return data
    def test_licenses(self):

        load_license(self.g)
        Session.flush()

        all_licenses = License.q()
        count = all_licenses.count()
        self.assertTrue(count > 0)
        self.assertTrue(count == len(list(self.g.subjects(None, SKOS.Concept))))

        all_localized = LocalizedLicenseName.q()
        self.assertTrue(all_localized.count() > 0)

        for_select = License.for_select('it')

        # check license type
        self.assertTrue(all([s[0] for s in for_select]))
Example #4
0
    def before_index(self, dataset_dict):
        '''
        Insert `dcat_theme` into solr
        '''
        
        extra_theme = dataset_dict.get("extras_theme" , None) or ''
        themes =  helpers.dump_dcatapit_subthemes(extra_theme)
        search_terms = [t['theme'] for t in themes]
        if search_terms:
            dataset_dict['dcat_theme'] = search_terms

        search_subthemes = []
        for t in themes:
            search_subthemes.extend(t.get('subthemes') or [])

        if search_terms:
            dataset_dict['dcat_theme'] = search_terms
        if search_subthemes:
            dataset_dict['dcat_subtheme'] = search_subthemes
            localized_subthemes = interfaces.get_localized_subthemes(search_subthemes)
            for lang, subthemes in localized_subthemes.items():
                dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes
        ddict = json.loads(dataset_dict['data_dict'])
        resources = ddict.get('resources') or []
        _licenses = list(set([r.get('license_type') for r in resources if r.get('license_type')]))

        licenses = []
        for l in _licenses:
            lic = License.get(l)
            if lic:
                for loclic in lic.get_names():
                    lname = loclic['name']
                    lang = loclic['lang']
                    if lname:
                        dataset_dict['resource_license_{}'.format(lang)] = lname
            else:
                log.warn('Bad license: license not found: %r ', l)

        dataset_dict['resource_license'] = _licenses

        org_id = dataset_dict['owner_org']
        organization_show = plugins.toolkit.get_action('organization_show')
        if org_id:
            org = organization_show(DEFAULT_ORG_CTX, {'id': org_id})
        else:
            org = {}
        if org.get('region'):

            # multilang!
            region_base = org['region']
            tags = interfaces.get_all_localized_tag_labels(region_base)
            for lang, region in tags.items():
                dataset_dict['organization_region_{}'.format(lang)] = region

        self._update_pkg_rights_holder(dataset_dict, org=org)
        return dataset_dict
Example #5
0
def get_license_from_package(pkg_dict):
    """
    Returns license from package
    """

    for_license = pkg_dict.get('license_title')
    license, fallback = License.find_by_token(for_license or 'Unknown')
    if fallback:
        log.warning("Got fallback license for %s", for_license)
    return license
    def test_ckan_harvester_license(self):

        dataset = {
            'title':
            'some title',
            'id':
            'sometitle',
            'resources': [{
                'id': 'resource/1111',
                'url': 'http://resource/1111',
                'license_type': 'invalid',
            }, {
                'id':
                'resource/2222',
                'url':
                'http://resource/2222',
                'license_type':
                'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13'
            }]
        }

        data = json.dumps(dataset)
        harvest_dict = self._create_harvest_obj('http://mock/source/',
                                                name='testpkg')
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = CKANMappingHarvester()
        h.import_stage(harvest_obj)
        Session.flush()

        pkg_dict = helpers.call_action('package_show',
                                       context={},
                                       name_or_id='sometitle')
        self.assertTrue(len(pkg_dict['resources']) == 2)

        resources = pkg_dict['resources']
        r = dataset['resources']
        for res in resources:
            if res['id'] == r[0]['id']:
                self.assertEqual(res['license_type'],
                                 License.get(License.DEFAULT_LICENSE).uri)
            else:
                self.assertEqual(res['license_type'], r[1]['license_type'])
    def test_license(self):
        
        def get_path(fname):
            return os.path.join(os.path.dirname(__file__),
                        '..', '..', '..', 'examples', fname)
        licenses = get_path('licenses.rdf')
        load_from_graph(path=licenses)
        Session.flush()


        dataset = {'title': 'some title',
                   'id': 'sometitle',
                   'resources': [
                            {
                                'id': 'resource/1111',
                                'uri': 'http://resource/1111',
                                'license_type': 'invalid',
                            },
                            {
                                'id': 'resource/2222',
                                'uri': 'http://resource/2222',
                                'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13'
                            }
                        ]
                    }
       

        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        s = RDFSerializer()


        dataset_ref = s.graph_from_dataset(dataset)

        g = s.g

        r1 = URIRef(dataset['resources'][0]['uri'])
        r2 = URIRef(dataset['resources'][1]['uri'])

        unknown = License.get(License.DEFAULT_LICENSE)

        license_ref = g.value(r1, DCT.license)
        
        assert license_ref is not None
        assert str(license_ref) == unknown.uri,\
            "got license {}, instead of {}".format(license_ref, unknown.license_type)

        gpl = License.get(dataset['resources'][1]['license_type'])
        assert gpl is not None

        license_ref = g.value(r2, DCT.license)
        license_type = g.value(license_ref, DCT.type)
        
        assert license_ref is not None

        assert str(license_ref) == gpl.document_uri
        assert str(license_type) == gpl.license_type

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        assert len(datasets) == 1
        new_dataset = datasets[0]
        resources = new_dataset['resources']

        def _find_res(res_uri):
            for res in resources:
                if res_uri == res['uri']:
                    return res
            raise ValueError("No resource for {}".format(res_uri))

        new_res_unknown = _find_res(str(r1))
        new_res_gpl = _find_res(str(r2))

        assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,)
        assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
    def before_index(self, dataset_dict):
        '''
        Insert `dcat_theme` into solr
        '''

        extra_theme = dataset_dict.get(f'extras_{FIELD_THEMES_AGGREGATE}', None) or ''
        aggr_themes = helpers.dcatapit_string_to_aggregated_themes(extra_theme)

        search_terms = [t['theme'] for t in aggr_themes]
        if search_terms:
            dataset_dict['dcat_theme'] = search_terms

        search_subthemes = []
        for t in aggr_themes:
            search_subthemes.extend(t.get('subthemes') or [])

        if search_subthemes:
            dataset_dict['dcat_subtheme'] = search_subthemes
            localized_subthemes = interfaces.get_localized_subthemes(search_subthemes)
            for lang, subthemes in localized_subthemes.items():
                dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes

        ddict = json.loads(dataset_dict['data_dict'])
        resources = ddict.get('resources') or []
        _licenses = list(set([r.get('license_type') for r in resources if r.get('license_type')]))

        for l in _licenses:
            lic = License.get(l)
            if lic:
                for loclic in lic.get_names():
                    lname = loclic['name']
                    lang = loclic['lang']
                    if lname:
                        dataset_dict['resource_license_{}'.format(lang)] = lname
            else:
                log.warning('Bad license: license not found: %r ', l)
        dataset_dict['resource_license'] = _licenses

        org_id = dataset_dict['owner_org']
        organization_show = plugins.toolkit.get_action('organization_show')
        if org_id:
            org = organization_show(get_org_context(), {'id': org_id,
                                                        'include_tags': False,
                                                        'include_users': False,
                                                        'include_groups': False,
                                                        'include_extras': True,
                                                        'include_followers': False,
                                                        'include_datasets': False,
                                                        })
        else:
            org = {}
        if org.get('region'):

            # multilang values
            # note region can be in {val1,val2} notation for multiple values
            region_base = org['region']
            if not isinstance(region_base, (list, tuple,)):
                region_base = region_base.strip('{}').split(',')
            tags = {}

            for region_name in region_base:
                ltags = interfaces.get_all_localized_tag_labels(region_name)
                for tlang, tvalue in ltags.items():
                    try:
                        tags[tlang].append(tvalue)
                    except KeyError:
                        tags[tlang] = [tvalue]

            for lang, region in tags.items():
                dataset_dict['organization_region_{}'.format(lang)] = region

        self._update_pkg_rights_holder(dataset_dict, org=org)
        return dataset_dict