def populate_resource_license(package_dict): license_id = package_dict.get('license_id') license_url = None license = None access_constraints = None for ex in package_dict.get('extras') or []: if ex['key'] in ( 'license_url', 'licence_url', ): license_url = ex['value'] elif ex['key'] in ( 'license', 'licence', ): license = ex['value'] elif ex['key'] == 'access_constraints': access_constraints = ex['value'] if not (access_constraints or license_id or license or license_url): l = License.get(License.DEFAULT_LICENSE) else: l, default = License.find_by_token(access_constraints, license, license_id, license_url) for res in package_dict['resources']: res['license_type'] = l.uri return package_dict
def get_package_dict(self, iso_values, harvest_object): package_dict = super(DCATAPITCSWHarvester, self).get_package_dict(iso_values, harvest_object) mapping_frequencies_to_mdr_vocabulary = self.source_config.get('mapping_frequencies_to_mdr_vocabulary', \ utils._mapping_frequencies_to_mdr_vocabulary) mapping_languages_to_mdr_vocabulary = self.source_config.get('mapping_languages_to_mdr_vocabulary', \ utils._mapping_languages_to_mdr_vocabulary) dcatapit_config = self.source_config.get('dcatapit_config', self._dcatapit_config) #if dcatapit_config and not all(name in dcatapit_config for name in self._dcatapit_config): # dcatapit_config = self._dcatapit_config # log.warning('Some keys are missing in dcatapit_config configuration property, \ # keyes to use are: dataset_theme, dataset_language, agent_code, frequency, \ # agent_code_regex, org_name_regex and dcatapit_skos_theme_id. Using defaults') #elif not dcatapit_config: # dcatapit_config = self._dcatapit_config controlled_vocabularies = dcatapit_config.get('controlled_vocabularies', \ self._dcatapit_config.get('controlled_vocabularies')) agents = dcatapit_config.get('agents', self._dcatapit_config.get('agents')) # ------------------------------# # MANDATORY FOR DCAT-AP_IT # # ------------------------------# # -- identifier -- # identifier = iso_values["guid"] package_dict['extras'].append({'key': 'identifier', 'value': identifier}) default_agent_code = identifier.split(':')[0] if ':' in identifier else None # -- theme -- # dataset_themes = [] if iso_values["keywords"]: default_vocab_id = self._dcatapit_config.get('controlled_vocabularies').get('dcatapit_skos_theme_id') dataset_themes = utils.get_controlled_vocabulary_values('eu_themes', \ controlled_vocabularies.get('dcatapit_skos_theme_id', default_vocab_id), iso_values["keywords"]) if dataset_themes and len(dataset_themes) > 1: dataset_themes = list(set(dataset_themes)) dataset_themes = '{' + ','.join(str(l) for l in dataset_themes) + '}' else: dataset_themes = dataset_themes[0] if dataset_themes and len(dataset_themes) > 0 else dcatapit_config.get('dataset_themes', \ self._dcatapit_config.get('dataset_themes')) log.info("Medatata harvested dataset themes: %r", dataset_themes) package_dict['extras'].append({'key': 'theme', 'value': dataset_themes}) # -- publisher -- # citedResponsiblePartys = iso_values["cited-responsible-party"] agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, agents.get('publisher', \ self._dcatapit_config.get('agents').get('publisher'))) package_dict['extras'].append({'key': 'publisher_name', 'value': agent_name}) package_dict['extras'].append({'key': 'publisher_identifier', 'value': agent_code or default_agent_code}) # -- modified -- # revision_date = iso_values["date-updated"] or iso_values["date-released"] package_dict['extras'].append({'key': 'modified', 'value': revision_date}) # -- frequency -- # updateFrequency = iso_values["frequency-of-update"] package_dict['extras'].append({'key': 'frequency', 'value': \ mapping_frequencies_to_mdr_vocabulary.get(updateFrequency, \ dcatapit_config.get('frequency', self._dcatapit_config.get('frequency')))}) # -- rights_holder -- # citedResponsiblePartys = iso_values["cited-responsible-party"] agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, \ agents.get('owner', self._dcatapit_config.get('agents').get('owner'))) package_dict['extras'].append({'key': 'holder_name', 'value': agent_name}) package_dict['extras'].append({'key': 'holder_identifier', 'value': agent_code or default_agent_code}) # -----------------------------------------------# # OTHER FIELDS NOT MANDATORY FOR DCAT_AP-IT # # -----------------------------------------------# # -- alternate_identifier nothing to do -- # # -- issued -- # publication_date = iso_values["date-released"] package_dict['extras'].append({'key': 'issued', 'value': publication_date}) # -- geographical_name -- # dataset_places = [] if iso_values["keywords"]: default_vocab_id = self._dcatapit_config.get('controlled_vocabularies').get('dcatapit_skos_theme_id') dataset_places = utils.get_controlled_vocabulary_values('places', \ controlled_vocabularies.get('dcatapit_skos_places_id', default_vocab_id), iso_values["keywords"]) if dataset_places and len(dataset_places) > 1: dataset_places = list(set(dataset_places)) dataset_places = '{' + ','.join(str(l) for l in dataset_places) + '}' else: dataset_places = dataset_places[0] if dataset_places and len(dataset_places) > 0 else dcatapit_config.get('dataset_places', \ self._dcatapit_config.get('dataset_places')) if dataset_places: log.info("Medatata harvested dataset places: %r", dataset_places) package_dict['extras'].append({'key': 'geographical_name', 'value': dataset_places}) # -- geographical_geonames_url nothing to do -- # # -- language -- # dataset_languages = iso_values["dataset-language"] language = None if dataset_languages and len(dataset_languages) > 0: languages = [] for language in dataset_languages: lang = mapping_languages_to_mdr_vocabulary.get(language, None) if lang: languages.append(lang) if len(languages) > 1: language = '{' + ','.join(str(l) for l in languages) + '}' else: language = languages[0] if len(languages) > 0 else dcatapit_config.get('dataset_languages', \ self._dcatapit_config.get('dataset_languages')) log.info("Medatata harvested dataset languages: %r", language) else: language = dcatapit_config.get('dataset_language') package_dict['extras'].append({'key': 'language', 'value': language}) # -- temporal_coverage -- # for key in ['temporal-extent-begin', 'temporal-extent-end']: if len(iso_values[key]) > 0: temporal_extent_value = iso_values[key][0] if key == 'temporal-extent-begin': package_dict['extras'].append({'key': 'temporal_start', 'value': temporal_extent_value}) if key == 'temporal-extent-end': package_dict['extras'].append({'key': 'temporal_end', 'value': temporal_extent_value}) # -- conforms_to -- # conforms_to = iso_values["conformity-specification-title"] package_dict['extras'].append({'key': 'conforms_to', 'value': conforms_to}) # -- creator -- # citedResponsiblePartys = iso_values["cited-responsible-party"] agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, \ agents.get('author', self._dcatapit_config.get('agents').get('author'))) package_dict['extras'].append({'key': 'creator_name', 'value': agent_name}) package_dict['extras'].append({'key': 'creator_identifier', 'value': agent_code or default_agent_code}) # -- license handling -- # license_id = package_dict.get('license_id') license_url = None license = None access_constraints = None for ex in package_dict['extras']: if ex['key'] == 'license_url': license_url = ex['value'] elif ex['key'] == 'license': license = ex['value'] elif ex['key'] == 'access_constraints': access_constraints = ex['value'] if not (access_constraints or license_id or license or license_url): l = License.get(License.DEFAULT_LICENSE) else: l, default = License.find_by_token(access_constraints, license, license_id, license_url) for res in package_dict['resources']: res['license_type'] = l.uri # End of processing, return the modified package return package_dict