def contacts_role_ansprechpartner(self, dataset): '''contacts.role.ansprechpartner -> extras.maintainer''' fields = util.get_extras_contacts_data(dataset, 'ansprechpartner') target_field = ds_utils.get_extras_field(dataset, u'maintainer_contacttype') # only add if the field hasn't been migrated before (check for added field) if target_field is None: if fields is not None: if fields.get('name') and fields.get('email'): dataset['maintainer'] = fields.pop('name', '') dataset['maintainer_email'] = fields.pop('email', '') ds_utils.insert_new_extras_field(dataset, u'maintainer_url', fields.pop('url', ''), False) util.update_extras_contacts_data(dataset, 'ansprechpartner', fields) # Additional field ds_utils.insert_new_extras_field( dataset, u'maintainer_contacttype', u'Organization', False) util.move_extras_contacts_address(dataset, 'ansprechpartner', 'maintainer', fields)
def move_extras_contacts_address(dataset, role, new_role, contact_data=None): # load the data if no preloaded dict is available if contact_data is None: contact_data = get_extras_contacts_data(dataset, role) if contact_data is not None: if 'address' in contact_data: parsed_addr = addr_parse(contact_data['address']) keys = ['addressee', 'details', 'street', 'zip', 'city', 'country'] # first, check if any of the new fields is present. If yes, skip # the movement to avoid corrupt datasets for k in keys: if ds_utils.get_extras_field(dataset, new_role + '_' + k): return for k in keys: if k in parsed_addr: ds_utils.insert_new_extras_field(dataset, new_role + '_' + k, parsed_addr[k], False) addr_field_new = parsed_addr.get('unknown') if addr_field_new: contact_data['address'] = addr_field_new log_warn(dataset, u'The following address parts of role ' + role + u' were not recognized: "' + addr_field_new + u'"') else: del contact_data['address'] update_extras_contacts_data(dataset, role, contact_data)
def migrate_dates_field(dataset, from_field, to_field): '''extras.dates.<<from_field>> -> extras.<<to_field>>''' extras_dates = get_extras_dates_data(dataset, from_field) target_field = ds_utils.get_extras_field(dataset, to_field) if target_field is None and extras_dates: ds_utils.insert_new_extras_field(dataset, to_field, extras_dates.pop('date', ''), False) update_extras_dates_data(dataset, from_field, extras_dates)
def contacts_role_veroeffentlichende_stelle(self, dataset): '''contacts.role.veroeffentlichende_stelle -> extras.publisher''' fields = util.get_extras_contacts_data(dataset, 'veroeffentlichende_stelle') target_field = ds_utils.get_extras_field(dataset, u'publisher_contacttype') # only add if the field hasn't been migrated before (check for added field) if target_field is None: if fields is not None: ds_utils.insert_new_extras_field(dataset, u'publisher_name', fields.pop('name', ''), False) ds_utils.insert_new_extras_field(dataset, u'publisher_email', fields.pop('email', ''), False) ds_utils.insert_new_extras_field(dataset, u'publisher_url', fields.pop('url', ''), False) util.update_extras_contacts_data(dataset, 'veroeffentlichende_stelle', fields) # Additional field ds_utils.insert_new_extras_field(dataset, u'publisher_contacttype', u'Organization', False) util.move_extras_contacts_address(dataset, 'veroeffentlichende_stelle', 'publisher', fields)
def metadata_original_portal(self, dataset): '''metadata_original_portal -> contributorID''' orig_field = ds_utils.get_extras_field(dataset, u'metadata_original_portal') target_field = ds_utils.get_extras_field(dataset, EXTRA_KEY_HARVESTED_PORTAL) if orig_field: util.rename_extras_field_migration(dataset, u'metadata_original_portal', u'contributorID', True, False) if target_field is None: ds_utils.insert_new_extras_field(dataset, EXTRA_KEY_HARVESTED_PORTAL, orig_field['value'], False)
def spatial_reference_text(self, dataset): '''spatial_reference.text -> extras.geocodingText''' spatial_reference = ds_utils.get_extras_field(dataset, 'spatial_reference') if spatial_reference is not None: sr_value = spatial_reference['value'] else: sr_value = None if sr_value is not None: # Convert string representation of dictionary to actual dictionary sr_value_dict = json.loads(sr_value, encoding='utf-8') field = sr_value_dict.get('text') if field is not None: ds_utils.insert_new_extras_field(dataset, u'geocodingText', field, True) sr_value_dict.pop('text', None) spatial_reference['value'] = unicode( json.dumps(sr_value_dict, sort_keys=True))
def parse_dataset(self, dataset_dict, dataset_ref): """ Transforms DCAT-AP.de-Data to CKAN-Dictionary """ # Simple additional fields for key, predicate in ( ('qualityProcessURI', DCATDE.qualityProcessURI), ('metadata_original_html', DCAT.landingPage), ('politicalGeocodingLevelURI', DCATDE.politicalGeocodingLevelURI), ): value = self._object_value(dataset_ref, predicate) if value: ds_utils.insert_new_extras_field(dataset_dict, key, value) # List fields for key, predicate, in ( ('contributorID', DCATDE.contributorID), ('politicalGeocodingURI', DCATDE.politicalGeocodingURI), ('legalbasisText', DCATDE.legalbasisText), ('geocodingText', DCATDE.geocodingText), ): values = self._object_value_list(dataset_ref, predicate) if values: ds_utils.insert_new_extras_field(dataset_dict, key, json.dumps(values)) self._parse_contact(dataset_dict, dataset_ref, DCATDE.originator, 'originator', True) self._parse_contact(dataset_dict, dataset_ref, DCATDE.maintainer, 'maintainer', False) self._parse_contact(dataset_dict, dataset_ref, DCT.contributor, 'contributor', True) self._parse_contact(dataset_dict, dataset_ref, DCT.creator, 'author', False) # dcat:contactPoint # TODO: dcat-ap adds the values to extras.contact_... . Maybe better than maintainer? contact = self._object(dataset_ref, DCAT.contactPoint) self._add_maintainer_field(dataset_dict, contact, 'url', VCARD.hasURL) contact_tel = self._object_value(contact, VCARD.hasTelephone) if contact_tel: ds_utils.insert(dataset_dict, 'maintainer_tel', self._without_tel(contact_tel), True) self._add_maintainer_field(dataset_dict, contact, 'street', VCARD.hasStreetAddress) self._add_maintainer_field(dataset_dict, contact, 'city', VCARD.hasLocality) self._add_maintainer_field(dataset_dict, contact, 'zip', VCARD.hasPostalCode) self._add_maintainer_field(dataset_dict, contact, 'country', VCARD.hasCountryName) # Groups groups = self._get_dataset_value(dataset_dict, 'groups') if not groups: groups = [] for obj in self.g.objects(dataset_ref, DCAT.theme): current_theme = unicode(obj) if current_theme.startswith(dcat_theme_prefix): group = current_theme.replace(dcat_theme_prefix, '').lower() groups.append({'id': group, 'name': group}) dataset_dict['groups'] = groups # Add additional distribution fields for distribution in self.g.objects(dataset_ref, DCAT.distribution): for resource_dict in dataset_dict.get('resources', []): # Match distribution in graph and distribution in ckan-dict if unicode(distribution) == resource_uri(resource_dict): for key, predicate in ( ('licenseAttributionByText', DCATDE.licenseAttributionByText), ('plannedAvailability', DCATDE.plannedAvailability) ): value = self._object_value(distribution, predicate) if value: ds_utils.insert_resource_extra(resource_dict, key, value) return dataset_dict