Ejemplo n.º 1
0
    def contacts_role_ansprechpartner(self, dataset):
        '''contacts.role.ansprechpartner -> extras.maintainer'''
        fields = util.get_extras_contacts_data(dataset, 'ansprechpartner')
        target_field = ds_utils.get_extras_field(dataset,
                                                 u'maintainer_contacttype')

        # only add if the field hasn't been migrated before (check for added field)
        if target_field is None:
            if fields is not None:
                if fields.get('name') and fields.get('email'):
                    dataset['maintainer'] = fields.pop('name', '')
                    dataset['maintainer_email'] = fields.pop('email', '')
                    ds_utils.insert_new_extras_field(dataset,
                                                     u'maintainer_url',
                                                     fields.pop('url',
                                                                ''), False)

                    util.update_extras_contacts_data(dataset,
                                                     'ansprechpartner', fields)

                    # Additional field
                    ds_utils.insert_new_extras_field(
                        dataset, u'maintainer_contacttype', u'Organization',
                        False)

                util.move_extras_contacts_address(dataset, 'ansprechpartner',
                                                  'maintainer', fields)
Ejemplo n.º 2
0
def move_extras_contacts_address(dataset, role, new_role, contact_data=None):
    # load the data if no preloaded dict is available
    if contact_data is None:
        contact_data = get_extras_contacts_data(dataset, role)

    if contact_data is not None:
        if 'address' in contact_data:
            parsed_addr = addr_parse(contact_data['address'])
            keys = ['addressee', 'details', 'street', 'zip', 'city', 'country']

            # first, check if any of the new fields is present. If yes, skip
            # the movement to avoid corrupt datasets
            for k in keys:
                if ds_utils.get_extras_field(dataset, new_role + '_' + k):
                    return

            for k in keys:
                if k in parsed_addr:
                    ds_utils.insert_new_extras_field(dataset, new_role + '_' + k,
                                                     parsed_addr[k], False)

            addr_field_new = parsed_addr.get('unknown')
            if addr_field_new:
                contact_data['address'] = addr_field_new
                log_warn(dataset, u'The following address parts of role ' +
                         role + u' were not recognized: "' + addr_field_new
                         + u'"')
            else:
                del contact_data['address']

            update_extras_contacts_data(dataset, role, contact_data)
Ejemplo n.º 3
0
def migrate_dates_field(dataset, from_field, to_field):
    '''extras.dates.<<from_field>> -> extras.<<to_field>>'''
    extras_dates = get_extras_dates_data(dataset, from_field)
    target_field = ds_utils.get_extras_field(dataset, to_field)

    if target_field is None and extras_dates:
        ds_utils.insert_new_extras_field(dataset, to_field,
                                         extras_dates.pop('date', ''), False)
        update_extras_dates_data(dataset, from_field, extras_dates)
Ejemplo n.º 4
0
    def contacts_role_veroeffentlichende_stelle(self, dataset):
        '''contacts.role.veroeffentlichende_stelle -> extras.publisher'''
        fields = util.get_extras_contacts_data(dataset,
                                               'veroeffentlichende_stelle')
        target_field = ds_utils.get_extras_field(dataset,
                                                 u'publisher_contacttype')

        # only add if the field hasn't been migrated before (check for added field)
        if target_field is None:
            if fields is not None:
                ds_utils.insert_new_extras_field(dataset, u'publisher_name',
                                                 fields.pop('name', ''), False)
                ds_utils.insert_new_extras_field(dataset, u'publisher_email',
                                                 fields.pop('email', ''),
                                                 False)
                ds_utils.insert_new_extras_field(dataset, u'publisher_url',
                                                 fields.pop('url', ''), False)

                util.update_extras_contacts_data(dataset,
                                                 'veroeffentlichende_stelle',
                                                 fields)

                # Additional field
                ds_utils.insert_new_extras_field(dataset,
                                                 u'publisher_contacttype',
                                                 u'Organization', False)
                util.move_extras_contacts_address(dataset,
                                                  'veroeffentlichende_stelle',
                                                  'publisher', fields)
Ejemplo n.º 5
0
    def metadata_original_portal(self, dataset):
        '''metadata_original_portal -> contributorID'''
        orig_field = ds_utils.get_extras_field(dataset,
                                               u'metadata_original_portal')
        target_field = ds_utils.get_extras_field(dataset,
                                                 EXTRA_KEY_HARVESTED_PORTAL)

        if orig_field:
            util.rename_extras_field_migration(dataset,
                                               u'metadata_original_portal',
                                               u'contributorID', True, False)
            if target_field is None:
                ds_utils.insert_new_extras_field(dataset,
                                                 EXTRA_KEY_HARVESTED_PORTAL,
                                                 orig_field['value'], False)
Ejemplo n.º 6
0
    def spatial_reference_text(self, dataset):
        '''spatial_reference.text -> extras.geocodingText'''
        spatial_reference = ds_utils.get_extras_field(dataset,
                                                      'spatial_reference')
        if spatial_reference is not None:
            sr_value = spatial_reference['value']
        else:
            sr_value = None

        if sr_value is not None:
            # Convert string representation of dictionary to actual dictionary
            sr_value_dict = json.loads(sr_value, encoding='utf-8')
            field = sr_value_dict.get('text')

            if field is not None:
                ds_utils.insert_new_extras_field(dataset, u'geocodingText',
                                                 field, True)

                sr_value_dict.pop('text', None)
                spatial_reference['value'] = unicode(
                    json.dumps(sr_value_dict, sort_keys=True))
Ejemplo n.º 7
0
    def parse_dataset(self, dataset_dict, dataset_ref):
        """ Transforms DCAT-AP.de-Data to CKAN-Dictionary """

        # Simple additional fields
        for key, predicate in (
               ('qualityProcessURI', DCATDE.qualityProcessURI),
               ('metadata_original_html', DCAT.landingPage),
               ('politicalGeocodingLevelURI', DCATDE.politicalGeocodingLevelURI),
               ):
            value = self._object_value(dataset_ref, predicate)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, key, value)

        # List fields
        for key, predicate, in (
               ('contributorID', DCATDE.contributorID),
               ('politicalGeocodingURI', DCATDE.politicalGeocodingURI),
               ('legalbasisText', DCATDE.legalbasisText),
               ('geocodingText', DCATDE.geocodingText),
               ):
            values = self._object_value_list(dataset_ref, predicate)
            if values:
                ds_utils.insert_new_extras_field(dataset_dict, key, json.dumps(values))

        self._parse_contact(dataset_dict, dataset_ref, DCATDE.originator, 'originator', True)
        self._parse_contact(dataset_dict, dataset_ref, DCATDE.maintainer, 'maintainer', False)
        self._parse_contact(dataset_dict, dataset_ref, DCT.contributor, 'contributor', True)
        self._parse_contact(dataset_dict, dataset_ref, DCT.creator, 'author', False)

        # dcat:contactPoint
        # TODO: dcat-ap adds the values to extras.contact_... . Maybe better than maintainer?
        contact = self._object(dataset_ref, DCAT.contactPoint)
        self._add_maintainer_field(dataset_dict, contact, 'url', VCARD.hasURL)

        contact_tel = self._object_value(contact, VCARD.hasTelephone)
        if contact_tel:
            ds_utils.insert(dataset_dict, 'maintainer_tel', self._without_tel(contact_tel), True)

        self._add_maintainer_field(dataset_dict, contact, 'street', VCARD.hasStreetAddress)
        self._add_maintainer_field(dataset_dict, contact, 'city', VCARD.hasLocality)
        self._add_maintainer_field(dataset_dict, contact, 'zip', VCARD.hasPostalCode)
        self._add_maintainer_field(dataset_dict, contact, 'country', VCARD.hasCountryName)

        # Groups
        groups = self._get_dataset_value(dataset_dict, 'groups')

        if not groups:
            groups = []

        for obj in self.g.objects(dataset_ref, DCAT.theme):
            current_theme = unicode(obj)

            if current_theme.startswith(dcat_theme_prefix):
                group = current_theme.replace(dcat_theme_prefix, '').lower()
                groups.append({'id': group, 'name': group})

        dataset_dict['groups'] = groups

        # Add additional distribution fields
        for distribution in self.g.objects(dataset_ref, DCAT.distribution):
            for resource_dict in dataset_dict.get('resources', []):
                # Match distribution in graph and distribution in ckan-dict
                if unicode(distribution) == resource_uri(resource_dict):
                    for key, predicate in (
                            ('licenseAttributionByText', DCATDE.licenseAttributionByText),
                            ('plannedAvailability', DCATDE.plannedAvailability)
                    ):
                        value = self._object_value(distribution, predicate)
                        if value:
                            ds_utils.insert_resource_extra(resource_dict, key, value)

        return dataset_dict