def amend_package(self, package):

        extras = package['extras']

        if package['license_id'] == '':
            package['license_id'] = 'notspecified'

        # if sector is not set, set it to 'oeffentlich' (default)
        if not extras.get('sector'):
            extras['sector'] = 'oeffentlich'

        if package['extras']['sector'] != 'oeffentlich':
            return False

        valid_types = ['datensatz', 'dokument', 'app']
        if not package.get('type') or package['type'] not in valid_types:
            package['type'] = 'datensatz'

        package['groups'] = translate_groups(package['groups'], 'berlin')
        default_portal = 'http://datenregister.berlin.de'
        if not extras.get('metadata_original_portal'):
            extras['metadata_original_portal'] = default_portal
        for resource in package['resources']:
            resource['format'] = resource['format'].lower()
        return True
    def amend_package(self, package):
        '''
        Amends the package data
        '''
        GovDataHarvester.amend_package(self, package)

        if 'license_id' not in package or package['license_id'] == '':
            package['license_id'] = 'notspecified'

        extras = Extras(package['extras'])

        # if sector is not set, set it to 'oeffentlich' (default)
        if not extras.key('sector', disallow_empty=True):
            extras.update('sector', 'oeffentlich', True)

        if extras.value('sector') != 'oeffentlich':
            return False

        # avoid ValidationError when extra dict
        # key 'type' is also used by the internal CKAN validation,
        # see GOVDATA-651
        if extras.key('type'):
            extras.remove('type')

        package['extras'] = extras.get()

        valid_types = ['datensatz', 'dokument', 'app']
        if not package.get('type') or package['type'] not in valid_types:
            package['type'] = 'datensatz'

        package['groups'] = translate_groups(package['groups'], 'berlin')

        return True
예제 #3
0
    def amend_package(self, package):

        extras = package['extras']

        if package['license_id'] == '':
            package['license_id'] = 'notspecified'

        # if sector is not set, set it to 'oeffentlich' (default)
        if not extras.get('sector'):
            extras['sector'] = 'oeffentlich'

        if package['extras']['sector'] != 'oeffentlich':
            return False

        valid_types = ['datensatz', 'dokument', 'app']
        if not package.get('type') or package['type'] not in valid_types:
            package['type'] = 'datensatz'

        package['groups'] = translate_groups(package['groups'], 'berlin')
        default_portal = 'http://datenregister.berlin.de'
        if not extras.get('metadata_original_portal'):
            extras['metadata_original_portal'] = default_portal
        for resource in package['resources']:
            resource['format'] = resource['format'].lower()
        return True
 def test_unmapable_group_flat_list(self):
     translate_result = translate_groups(
         ['test-unmapable-1', 'test-unmapable-2'],
         'hamburg'
     )
     self.assertEquals(
         translate_result,
         []
     )
    def amend_package(self, package):
        '''This function fixes some differences in the datasets
           retrieved from Bremen and our schema such as:
        - fix groups
        - set metadata_original_portal
        - fix terms_of_use
        - copy veroeffentlichende_stelle to maintainer
        - set spatial text
        '''
        package['id'] = self.generate_id_from_name(package['name'])

        GovDataHarvester.amend_package(self, package)

        # set correct groups
        if not package['groups']:
            package['groups'] = []

        groups_before_log_message = 'groups before translate: {groups}'.format(
            groups=json.dumps(package['groups'])
        )
        LOGGER.debug(groups_before_log_message)

        package['groups'] = translate_groups(package['groups'], 'bremen')

        groups_after_log_message = 'groups after translate: {groups}'.format(
            groups=json.dumps(package['groups'])
        )
        LOGGER.debug(groups_after_log_message)

        # copy veroeffentlichende_stelle to maintainer
        extras = Extras(package['extras'])

        if extras.key('contacts'):
            contacts_dict = json.loads(extras.value('contacts'))
            quelle = filter(
                lambda x: x['role'] == 'veroeffentlichende_stelle',
                contacts_dict
            )
            if quelle:
                package['maintainer'] = quelle[0]['name']
                package['maintainer_email'] = quelle[0]['email']
            else:
                LOGGER.info('Unable to resolve maintainer details')

        # fix typos in terms of use
        package['license_id'] = u'notspecified'

        if extras.key('terms_of_use'):
            self.fix_terms_of_use(extras)
            terms_of_use_dict = json.loads(extras.value('terms_of_use'))
            package['license_id'] = terms_of_use_dict['license_id']

        if not extras.key('spatial-text'):
            extras.update('spatial-text', 'Bremen 04 0 11 000', True)

        package['extras'] = extras.get()
예제 #6
0
    def amend_package(self, package):
        if not package['groups']:
            package['groups'] = []
        #fix groups
        if not package['groups']:
            package['groups'] = []
        package['groups'] = [x for x in translate_groups(package['groups'], 'govapps') if len(x) > 0]

        #generate id based on OID namespace and package name, this makes sure,
        #that packages with the same name get the same id
        package['id'] = str(uuid.uuid5(uuid.NAMESPACE_OID, str(package['name'])))
예제 #7
0
    def amend_package(self, package):
        '''
        This function fixes some differences in the datasets retrieved from Bremen and our schema such as:
        - fix groups
        - set metadata_original_portal
        - fix terms_of_use
        - copy veroeffentlichende_stelle to maintainer
        - set spatial text
        '''

        #set metadata original portal
        package['extras'][
            'metadata_original_portal'] = 'http://daten.bremen.de/sixcms/detail.php?template=export_daten_json_d'

        # set correct groups
        if not package['groups']:
            package['groups'] = []
        package['groups'] = translate_groups(package['groups'], 'bremen')

        #copy veroeffentlichende_stelle to maintainer
        if 'contacts' in package['extras']:
            quelle = filter(lambda x: x['role'] == 'veroeffentlichende_stelle',
                            package['extras']['contacts'])[0]
            package['maintainer'] = quelle['name']
            package['maintainer_email'] = quelle['email']

        #fix typos in terms of use
        if 'terms_of_use' in package['extras']:
            self.fix_terms_of_use(package['extras']['terms_of_use'])
            #copy license id
            package['license_id'] = package['extras']['terms_of_use'][
                'license_id']
        else:
            package['license_id'] = u'notspecified'

        if not "spatial-text" in package["extras"]:
            package["extras"]["spatial-text"] = 'Bremen 04 0 11 000'

        #generate id based on OID namespace and package name, this makes sure,
        #that packages with the same name get the same id
        package['id'] = str(
            uuid.uuid5(uuid.NAMESPACE_OID, str(package['name'])))
        for resource in package['resources']:
            resource['format'] = resource['format'].lower()

        for resource in package['resources']:
            resource['format'] = resource['format'].lower()
예제 #8
0
    def amend_package(self, package):
        if not package['groups']:
            package['groups'] = []
        #fix groups
        if not package['groups']:
            package['groups'] = []
        package['groups'] = [
            x for x in translate_groups(package['groups'], 'govapps')
            if len(x) > 0
        ]

        #generate id based on OID namespace and package name, this makes sure,
        #that packages with the same name get the same id
        package['id'] = str(
            uuid.uuid5(uuid.NAMESPACE_OID, str(package['name'])))

        for resource in package['resources']:
            resource['format'] = resource['format'].lower()
    def amend_package(self, package):
        """
        This function fixes some differences in the datasets retrieved from Bremen and our schema such as:
        - fix groups
        - set metadata_original_portal
        - fix terms_of_use
        - copy veroeffentlichende_stelle to maintainer
        - set spatial text
        """

        # set metadata original portal
        package['extras'][
            'metadata_original_portal'] = 'http://daten.bremen.de/sixcms/detail.php?template=export_daten_json_d'

        # set correct groups
        if not package['groups']:
            package['groups'] = []
        package['groups'] = translate_groups(package['groups'], 'bremen')

        # copy veroeffentlichende_stelle to maintainer
        if 'contacts' in package['extras']:
            quelle = filter(lambda x: x['role'] == 'veroeffentlichende_stelle', package['extras']['contacts'])[0]
            package['maintainer'] = quelle['name']
            package['maintainer_email'] = quelle['email']

        # fix typos in terms of use
        if 'terms_of_use' in package['extras']:
            self.fix_terms_of_use(package['extras']['terms_of_use'])
            # copy license id
            package['license_id'] = package['extras']['terms_of_use']['license_id']
        else:
            package['license_id'] = u'notspecified'

        if "spatial-text" not in package["extras"]:
            package["extras"]["spatial-text"] = 'Bremen 04 0 11 000'

        # generate id based on OID namespace and package name, this makes sure,
        # that packages with the same name get the same id
        package['id'] = str(uuid.uuid5(uuid.NAMESPACE_OID, str(package['name'])))
        for resource in package['resources']:
            resource['format'] = resource['format'].lower()

        for resource in package['resources']:
            resource['format'] = resource['format'].lower()
    def test_mapable_group_flat_list(self):
        translate_result = translate_groups(
            ['bevolkerung', 'umwelt-und-klima', 'transport-und-verkehr'],
            'hamburg'
        )

        self.assertEquals(
            type(translate_result).__name__,
            'list'
        )
        self.assertEquals(
            translate_result.__len__(),
            3
        )
        self.assertEquals(
            translate_result,
            [{'id': u'bevoelkerung', 'name': u'bevoelkerung'},
             {'id': u'umwelt_klima', 'name': u'umwelt_klima'},
             {'id': u'transport_verkehr', 'name': u'transport_verkehr'}]
        )
    def test_mapable_group_dict_list(self):
        dict_list = [{
            "vocabulary_id": 1,
            "state": "active",
            "display_name": "offene-daten-k\u00f6ln",
            "id": "07767723-df63-44fa-8bb1-002cf932c2f6",
            "name": "bevolkerung"
        }, {
            "vocabulary_id": 2,
            "state": "active",
            "display_name": "offene-daten-k\u00f6ln",
            "id": "07767723-df63-44fa-8bb1-002cf932c2f6",
            "name": "umwelt-und-klima"
        }, {
            "vocabulary_id": 3,
            "state": "active",
            "display_name": "offene-daten-k\u00f6ln",
            "id": "07767723-df63-44fa-8bb1-002cf932c2f6",
            "name": "transport-und-verkehr"
        }]

        translate_result = translate_groups(
            dict_list,
            'hamburg'
        )

        self.assertEquals(
            type(translate_result).__name__,
            'list'
        )
        self.assertEquals(
            translate_result.__len__(),
            3
        )
        self.assertEquals(
            translate_result,
            [{'id': u'bevoelkerung', 'name': u'bevoelkerung'},
             {'id': u'umwelt_klima', 'name': u'umwelt_klima'},
             {'id': u'transport_verkehr', 'name': u'transport_verkehr'}]
        )
    def test_unmapable_group_dict_list(self):
        dict_list = [{
            "vocabulary_id": 1,
            "state": "active",
            "display_name": "offene-daten-k\u00f6ln",
            "id": "07767723-df63-44fa-8bb1-002cf932c2f6",
            "name": "Group 1"
        }, {
            "vocabulary_id": 2,
            "state": "active",
            "display_name": "offene-daten-k\u00f6ln",
            "id": "07767723-df63-44fa-8bb1-002cf932c2f6",
            "name": "Group 2"
        }]

        translate_result = translate_groups(
            dict_list,
            'hamburg'
        )
        self.assertEquals(
            translate_result,
            []
        )
    def amend_package(self, package):
        # check if latestVersion of package
        extras = package['extras']

        is_latest_version = extras.get('latestVersion', None)

        if is_latest_version == "true":
            log.debug('received latestVersion == true. Continue with this dataset')
            # get metadata_original_id
            # TODO subject to change in the future
            remote_metadata_original_id = extras.get('metadata_original_id', None)
            registry = ckanapi.RemoteCKAN('http://localhost:80/ckan')
            local_search_result = registry.action.package_search(q='metadata_original_id:"' + remote_metadata_original_id + '"')
            if local_search_result['count'] == 0:
                log.debug('Did not find this metadata original id. Import accepted.')
            elif local_search_result['count'] == 1:
                log.debug('Found local dataset for particular metadata_original_id')
                local_dataset_from_action_api = local_search_result['results'][0]

                # copy name and id from local dataset to remote dataset
                log.debug('Copy id and name to remote dataset')
                log.debug(package['id'])
                log.debug(package['name'])
                package['id'] = local_dataset_from_action_api['id']
                package['name'] = local_dataset_from_action_api['name']
                log.debug(package['id'])
                log.debug(package['name'])
            else :
                log.debug('Found more than one local dataset for particular metadata_original_id. Offending metadata_original_id is:')
                log.debug(remote_metadata_original_id)
        elif is_latest_version == 'false':
            # do not import or update this particular remote dataset
            log.debug('received latestVersion == false. Skip this dataset')
            return False

        # check if import is desired
        if package['type'] == 'document':
            # check if tag 'govdata' exists
            if not [tag for tag in package['tags'] if tag.lower() == 'govdata']:
                log.debug('Found invalid package')
                return False
            package['type'] = 'dokument'
        # check if import is desired
        elif package['type'] == 'dokument':
            # check if tag 'govdata' exists
            if not [tag for tag in package['tags'] if tag.lower() == 'govdata']:
                log.debug('Found invalid package')
                return False
        elif package['type'] == 'dataset':
            package['type'] = 'datensatz'

        # fix groups
        log.debug("Before: ")
        log.debug(package['groups'])
        package['groups'] = translate_groups(package['groups'], 'hamburg')
        log.debug("After: ")
        log.debug(package['groups'])
        # set original portal
        if not extras.get('metadata_original_portal'):
            extras['metadata_original_portal'] = self.PORTAL

        assert_author_fields(package, package.get('maintainer'),
                             package.get('maintainer_email'))

        return True
    def amend_package(self, package):
        '''
        Amends the package data
        '''
        GovDataHarvester.amend_package(self, package)

        context = self.build_context()

        extras = Extras(package['extras'])

        is_latest_version = None
        if extras.key('latestVersion'):
            is_latest_version = extras.value('latestVersion')

        if is_latest_version == 'true':
            LOGGER.debug(
                'received latestVersion == true. Continue with this dataset')

            remote_metadata_original_id = extras.value(
                'metadata_original_id'
            )

            # compare harvested OGD-Dataset with local DCAT-AP.de-Dataset
            data_dict = {"q": 'identifier:"' + remote_metadata_original_id + '"'}
            local_search_result = get_action("package_search")(context, data_dict)

            if local_search_result['count'] == 0:
                LOGGER.debug(
                    'Did not find this metadata original id. Import accepted.')
            elif local_search_result['count'] == 1:
                LOGGER.debug(
                    'Found local dataset for particular metadata_original_id')
                local_dataset_from_action_api = local_search_result[
                    'results'][0]

                # copy name and id from local dataset to remote dataset
                LOGGER.debug('Copy id and name to remote dataset')
                LOGGER.debug(package['id'])
                LOGGER.debug(package['name'])
                package['id'] = local_dataset_from_action_api['id']
                package['name'] = local_dataset_from_action_api['name']
                LOGGER.debug(package['id'])
                LOGGER.debug(package['name'])
            else:

                log_message = 'Found more than one local dataset for '
                log_message = log_message + 'particular metadata_original_id. '
                log_message = log_message + 'Offending metadata_original_id '
                log_message = log_message + 'is:'
                LOGGER.debug(log_message)
                LOGGER.debug(remote_metadata_original_id)
        elif is_latest_version == 'false':
            # do not import or update this particular remote dataset
            LOGGER.debug('received latestVersion == false. Skip this dataset')
            return False

        # check if import is desired
        if package['type'] == 'document' or package['type'] == 'dokument':
            if not self.has_tag(package['tags'], 'govdata'):
                LOGGER.debug("Found invalid package with 'govdata' tag")
                return False
            package['type'] = 'dokument'
        elif package['type'] == 'dataset':
            package['type'] = 'datensatz'

        # fix groups
        LOGGER.debug('Before: ')
        LOGGER.debug(package['groups'])
        package['groups'] = translate_groups(package['groups'], 'hamburg')
        LOGGER.debug('After: ')
        LOGGER.debug(package['groups'])

        self.assert_author_fields(
            package,
            package.get('maintainer'),
            package.get('maintainer_email')
        )

        return True