def harvest_source_show(context,data_dict): ''' Returns the metadata of a harvest source This method just proxies the request to package_show. All auth checks and validation will be done there. :param id: the id or name of the harvest source :type id: string :returns: harvest source metadata :rtype: dictionary ''' check_access('harvest_source_show',context,data_dict) id = data_dict.get('id') attr = data_dict.get('attr',None) source = HarvestSource.get(id,attr=attr) context['source'] = source if not source: raise NotFound if 'include_status' not in context: context['include_status'] = True return harvest_source_dictize(source,context)
def harvest_source_create(context,data_dict): log.info('Creating harvest source: %r', data_dict) check_access('harvest_source_create',context,data_dict) model = context['model'] session = context['session'] schema = context.get('schema') or default_harvest_source_schema() data, errors = validate(data_dict, schema) if errors: session.rollback() log.warn('Harvest source does not validate: %r', errors) raise ValidationError(errors,_error_summary(errors)) source = HarvestSource() source.url = data['url'].strip() source.type = data['type'] opt = ['active','title','description','user_id','publisher_id','config'] for o in opt: if o in data and data[o] is not None: source.__setattr__(o,data[o]) if 'active' in data_dict: source.active = data['active'] source.save() log.info('Harvest source created: %s', source.id) return harvest_source_dictize(source,context)
def harvest_source_for_a_dataset(context, data_dict): """ TODO: Deprecated, harvest source id is added as an extra to each dataset automatically """ """For a given dataset, return the harvest source that created or last updated it, otherwise NotFound.""" model = context["model"] session = context["session"] dataset_id = data_dict.get("id") query = ( session.query(HarvestSource) .join(HarvestObject) .filter_by(package_id=dataset_id) .order_by(HarvestObject.gathered.desc()) ) source = query.first() # newest if not source: raise NotFound return harvest_source_dictize(source, context)
def harvest_source_create(context, data_dict): log.info('Creating harvest source: %r', data_dict) check_access('harvest_source_create', context, data_dict) model = context['model'] session = context['session'] schema = context.get('schema') or default_harvest_source_schema() data, errors = validate(data_dict, schema) if errors: session.rollback() log.warn('Harvest source does not validate: %r', errors) raise ValidationError(errors, _error_summary(errors)) source = HarvestSource() source.url = data['url'].strip() source.type = data['type'] opt = [ 'active', 'title', 'description', 'user_id', 'publisher_id', 'config' ] for o in opt: if o in data and data[o] is not None: source.__setattr__(o, data[o]) if 'active' in data_dict: source.active = data['active'] source.save() log.info('Harvest source created: %s', source.id) return harvest_source_dictize(source, context)
def harvest_source_list(context, data_dict): user = context.get('user') check_access('harvest_source_list',context,data_dict) sources = _get_sources_for_user(context, data_dict) context['detailed'] = False return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_show(context, data_dict): check_access('harvest_source_show', context, data_dict) id = data_dict.get('id') attr = data_dict.get('attr', None) source = HarvestSource.get(id, attr=attr) if not source: raise NotFound return harvest_source_dictize(source, context)
def harvest_source_update(context, data_dict): check_access('harvest_source_update', context, data_dict) model = context['model'] session = context['session'] source_id = data_dict.get('id') schema = context.get('schema') or default_harvest_source_schema() log.info('Harvest source %s update: %r', source_id, data_dict) source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) data, errors = validate(data_dict, schema) if errors: session.rollback() raise ValidationError(errors, _error_summary(errors)) fields = ['url', 'title', 'type', 'description', 'user_id', 'publisher_id'] for f in fields: if f in data and data[f] is not None: if f == 'url': data[f] = data[f].strip() source.__setattr__(f, data[f]) if 'active' in data_dict: source.active = data['active'] if 'config' in data_dict: source.config = data['config'] source.save() # Abort any pending jobs if not source.active: jobs = HarvestJob.filter(source=source, status=u'New') log.info( 'Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count()) if jobs: for job in jobs: job.status = u'Aborted' job.save() # Ensure sqlalchemy writes to the db immediately, since the gather/fetch # runs in a different process and needs the latest source info. Not sure if # this works, but try it. model.repo.commit_and_remove() return harvest_source_dictize(source, context)
def harvest_source_show(context,data_dict): p.toolkit.check_access('harvest_source_show',context,data_dict) id = data_dict.get('id') attr = data_dict.get('attr',None) source = HarvestSource.get(id,attr=attr) if not source: raise NotFound return harvest_source_dictize(source,context)
def harvest_source_list(context, data_dict): ''' TODO: Use package search ''' check_access('harvest_source_list', context, data_dict) sources = _get_sources_for_user(context, data_dict) last_job_status = p.toolkit.asbool(data_dict.get('return_last_job_status', False)) return [harvest_source_dictize(source, context, last_job_status) for source in sources]
def harvest_source_list(context, data_dict): p.toolkit.check_access('harvest_source_list',context,data_dict) model = context['model'] session = context['session'] user = context.get('user','') sources = _get_sources_for_user(context, data_dict) context.update({'detailed':False}) return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_list(context, data_dict): check_access('harvest_source_list', context, data_dict) model = context['model'] session = context['session'] user = context.get('user', '') sources = _get_sources_for_user(context, data_dict) context.update({'detailed': False}) return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_update(context,data_dict): check_access('harvest_source_update',context,data_dict) model = context['model'] session = context['session'] source_id = data_dict.get('id') schema = context.get('schema') or default_harvest_source_schema() log.info('Harvest source %s update: %r', source_id, data_dict) source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) data, errors = validate(data_dict, schema) if errors: session.rollback() raise ValidationError(errors,_error_summary(errors)) fields = ['url','title','type','description','user_id','publisher_id'] for f in fields: if f in data and data[f] is not None: if f == 'url': data[f] = data[f].strip() source.__setattr__(f,data[f]) if 'active' in data_dict: source.active = data['active'] if 'config' in data_dict: source.config = data['config'] source.save() # Abort any pending jobs if not source.active: jobs = HarvestJob.filter(source=source,status=u'New') log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count()) if jobs: for job in jobs: job.status = u'Aborted' job.save() # Ensure sqlalchemy writes to the db immediately, since the gather/fetch # runs in a different process and needs the latest source info. Not sure if # this works, but try it. model.repo.commit_and_remove() return harvest_source_dictize(source,context)
def harvest_source_list(context, data_dict): ''' TODO: Use package search ''' check_access('harvest_source_list', context, data_dict) model = context['model'] session = context['session'] user = context.get('user', '') sources = _get_sources_for_user(context, data_dict) return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_list(context, data_dict): ''' TODO: Use package search ''' check_access('harvest_source_list',context,data_dict) model = context['model'] session = context['session'] user = context.get('user','') sources = _get_sources_for_user(context, data_dict) return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_list(context, data_dict): ''' TODO: Use package search ''' organization_id = data_dict.get('organization_id') limit = config.get('ckan.harvest.harvest_source_limit', 100) sources = _get_sources_for_user(context, data_dict, organization_id=organization_id, limit=limit) last_job_status = p.toolkit.asbool(data_dict.get('return_last_job_status', False)) return [harvest_source_dictize(source, context, last_job_status) for source in sources]
def harvest_source_list(context, data_dict): """ TODO: Use package search """ check_access("harvest_source_list", context, data_dict) model = context["model"] session = context["session"] user = context.get("user", "") sources = _get_sources_for_user(context, data_dict) context.update({"detailed": False}) return [harvest_source_dictize(source, context) for source in sources]
def harvest_source_update(context,data_dict): check_access('harvest_source_update',context,data_dict) model = context['model'] session = context['session'] source_id = data_dict.get('id') schema = context.get('schema') or default_harvest_source_schema() log.info('Harvest source %s update: %r', source_id, data_dict) source = HarvestSource.get(source_id) if not source: log.error('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) data, errors = validate(data_dict, schema) if errors: session.rollback() raise ValidationError(errors,_error_summary(errors)) fields = ['url','title','type','description','user_id','publisher_id'] for f in fields: if f in data and data[f] is not None: source.__setattr__(f,data[f]) if 'active' in data_dict: source.active = data['active'] if 'config' in data_dict: source.config = data['config'] source.save() # Abort any pending jobs if not source.active: jobs = HarvestJob.filter(source=source,status=u'New') log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count()) if jobs: for job in jobs: job.status = u'Aborted' job.save() return harvest_source_dictize(source,context)
def harvest_source_for_a_dataset(context, data_dict): '''For a given dataset, return the harvest source that created or last updated it, otherwise NotFound.''' model = context['model'] session = context['session'] dataset_id = data_dict.get('id') query = session.query(HarvestSource)\ .join(HarvestObject)\ .filter_by(package_id=dataset_id)\ .order_by(HarvestObject.gathered.desc()) source = query.first() # newest if not source: raise NotFound return harvest_source_dictize(source,context)
def harvest_source_for_a_dataset(context, data_dict): '''For a given dataset, return the harvest source that created or last updated it, otherwise NotFound.''' model = context['model'] session = context['session'] dataset_id = data_dict.get('id') query = session.query(HarvestSource)\ .join(HarvestObject)\ .filter_by(package_id=dataset_id)\ .order_by(HarvestObject.gathered.desc()) source = query.first() # newest if not source: raise NotFound return harvest_source_dictize(source, context)
def harvest_source_for_a_dataset(context, data_dict): '''For a given dataset, return the harvest source that created or last updated it, otherwise NotFound.''' model = context['model'] session = context['session'] dataset_id = data_dict.get('id') query = session.query(HarvestSource)\ .join(HarvestObject)\ .filter_by(package_id=dataset_id)\ .order_by(HarvestObject.gathered.desc()) source = query.first() # newest if not source: raise NotFound if not context.get('include_status'): # By default we don't want to know the harvest # source status - this is an expensive call. context['include_status'] = False return harvest_source_dictize(source, context)