def package_update_rest_minimal(context, data_dict): setup() package = '' fulltext = '' old_fulltext = '' if data_dict.has_key('extras'): if 'full_text_search' in data_dict['extras'].keys(): fulltext = data_dict['extras']['full_text_search'] data_dict = _del_extra_field_from_list(data_dict, 'full_text_search') package = update.package_update_rest(context, data_dict) old_fulltext = None if package.has_key('id'): old_fulltext = Session.query(PackageFulltext) \ .filter(PackageFulltext.package_id==package['id']) \ .first() fulltext_dict_save(fulltext, old_fulltext, package, context) else: package = update.package_update(context, data_dict) else: package = update.package_update_rest(context, data_dict) if check_logged_in(context): fulltext = _get_fulltext(package['id']) if fulltext: package['extras']['full_text_search'] = fulltext.text return package minimal_package = _del_extra_field_from_list(package) minimal_package = _del_main_field_from_dict(minimal_package) return minimal_package
def package_update_rest_minimal(context, data_dict): setup() package= '' fulltext = '' old_fulltext = '' if data_dict.has_key('extras'): if 'full_text_search' in data_dict['extras'].keys(): fulltext = data_dict['extras']['full_text_search'] data_dict = _del_extra_field_from_list(data_dict, 'full_text_search') package = update.package_update_rest(context, data_dict) old_fulltext = None if package.has_key('id'): old_fulltext = Session.query(PackageFulltext) \ .filter(PackageFulltext.package_id==package['id']) \ .first() fulltext_dict_save(fulltext, old_fulltext, package, context) else: package = update.package_update(context, data_dict) else: package = update.package_update_rest(context, data_dict) if check_logged_in(context): fulltext = _get_fulltext(package['id']) if fulltext: package['extras']['full_text_search'] = fulltext.text return package minimal_package = _del_extra_field_from_list(package) minimal_package = _del_main_field_from_dict(minimal_package) return minimal_package
def import_stage(self, harvest_object): package_dict = json.loads(harvest_object.content) # do not import packages that are not defined as open if not package_dict['isopen']: return if package_dict['license_id'] in EXCLUDE_OPEN_LICENSES: return super(OpenCKANHarvester, self).import_stage(harvest_object) if harvest_object.package_id: # Add some extras to the newly created package new_extras = { 'harvest_catalogue_name': self.config.get('harvest_catalogue_name', ''), 'harvest_catalogue_url': harvest_object.job.source.url, 'harvest_dataset_url': harvest_object.job.source.url.strip('/') + '/package/' + harvest_object.package_id } context = { 'model': model, 'session': Session, 'user': u'harvest', 'id': harvest_object.package_id } data_dict = {'extras': new_extras} package_update_rest(data_dict, context)
def import_stage(self, harvest_object): super(PDEUCKANHarvester, self).import_stage(harvest_object) if harvest_object.package_id: # Add some extras to the newly created package new_extras = { 'eu_country': self.config.get('eu_country', ''), 'harvest_catalogue_name': self.config.get('harvest_catalogue_name', ''), 'harvest_catalogue_url': harvest_object.job.source.url, 'harvest_dataset_url': harvest_object.job.source.url.strip('/') + '/package/' + harvest_object.package_id } for extra in ['eu_nuts1', 'eu_nuts2', 'eu_nuts3']: if self.config.get(extra, ''): new_extras[extra] = self.config[extra] context = { 'model': model, 'session': Session, 'user': u'harvest', 'id': harvest_object.package_id } data_dict = {'extras': new_extras} package_update_rest(data_dict, context)
def import_stage(self, harvest_object): package_dict = json.loads(harvest_object.content) # do not import packages that are not defined as open if not package_dict["isopen"]: return if package_dict["license_id"] in EXCLUDE_OPEN_LICENSES: return super(OpenCKANHarvester, self).import_stage(harvest_object) if harvest_object.package_id: # Add some extras to the newly created package new_extras = { "harvest_catalogue_name": self.config.get("harvest_catalogue_name", ""), "harvest_catalogue_url": harvest_object.job.source.url, "harvest_dataset_url": harvest_object.job.source.url.strip("/") + "/package/" + harvest_object.package_id, } context = {"model": model, "session": Session, "user": u"harvest", "id": harvest_object.package_id} data_dict = {"extras": new_extras} package_update_rest(data_dict, context)
def _create_or_update_package(self,package_dict,harvest_object): ''' Creates a new package or updates an exisiting one according to the package dictionary provided. The package dictionary should look like the REST API response for a package: http://ckan.net/api/rest/package/statistics-catalunya Note that the package_dict must contain an id, which will be used to check if the package needs to be created or updated (use the remote dataset id). If the remote server provides the modification date of the remote package, add it to package_dict['metadata_modified']. ''' try: #from pprint import pprint #pprint(package_dict) ## change default schema schema = default_package_schema() schema["id"] = [ignore_missing, unicode] context = { 'model': model, 'session':Session, 'user': u'harvest', 'api_version':'2', 'schema': schema, } # Check if package exists context.update({'id':package_dict['id']}) try: existing_package_dict = package_show(context) # Check modified date if not 'metadata_modified' in package_dict or \ package_dict['metadata_modified'] > existing_package_dict['metadata_modified']: log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid) # Update package updated_package = package_update_rest(package_dict,context) harvest_object.package_id = updated_package['id'] harvest_object.save() else: log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid) except NotFound: # Package needs to be created del context['id'] log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) new_package = package_create_rest(package_dict,context) harvest_object.package_id = new_package['id'] harvest_object.save() return True except ValidationError,e: log.exception(e) self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import')
def import_stage(self,harvest_object): super(BerlinCKANHarvester, self).import_stage(harvest_object) if harvest_object.package_id: original_package = json.loads(harvest_object.content) # Add some extras to the newly created package new_extras = { 'eu_country': self.config.get('eu_country',''), 'harvest_catalogue_name': self.config.get('harvest_catalogue_name',''), 'harvest_catalogue_url': harvest_object.job.source.url, 'harvest_dataset_url': harvest_object.job.source.url.strip('/') + '/package/' + harvest_object.package_id } for extra in ['eu_nuts1','eu_nuts2','eu_nuts3']: if self.config.get(extra,''): new_extras[extra] = self.config[extra] if len(original_package.get('groups',[])): group_id = original_package['groups'][0] if not group_id in self._groups_cache: log.debug('Requesting group details: %s' % group_id) url = harvest_object.source.url.rstrip('/') url = url + self._get_rest_api_offset() + '/group/' + group_id # Get contents try: content = self._get_content(url) group = json.loads(content) self._groups_cache[group_id] = group['name'] except Exception,e: self._save_object_error('Unable to get content for group: %s: %r' % \ (url, e),harvest_object) new_extras['categories'] = self._groups_cache[group_id] context = { 'model': model, 'session': Session, 'user': u'harvest', 'id': harvest_object.package_id } data_dict = {'extras':new_extras} package_update_rest(data_dict,context)