def register_harvesting_job(self, source_id, user_ref): from ckan.model import HarvestSource from ckan.model import HarvestingJob if re.match("(http|file)://", source_id): source_url = unicode(source_id) source_id = None sources = HarvestSource.filter(url=source_url).all() if sources: source = sources[0] else: source = self.create_harvest_source(url=source_url, user_ref=user_ref, publisher_ref=u"") else: source = HarvestSource.get(source_id) job = HarvestingJob(source=source, user_ref=user_ref, status=u"New") job.save() print "Created new harvesting job:" self.print_harvesting_job(job) status = u"New" jobs = HarvestingJob.filter(status=status).all() self.print_there_are("harvesting job", jobs, condition=status)
def run_harvester(self, *args, **kwds): from pylons.i18n.translation import _get_translator import pylons pylons.translator._push_object(_get_translator(pylons.config.get("lang"))) from ckan.model import HarvestingJob from ckan.controllers.harvesting import HarvestingJobController jobs = HarvestingJob.filter(status=u"New").all() jobs_len = len(jobs) jobs_count = 0 if jobs_len: print "Running %s harvesting jobs..." % jobs_len else: print "There are no new harvesting jobs." print "" for job in jobs: jobs_count += 1 print "Running job %s/%s: %s" % (jobs_count, jobs_len, job.id) self.print_harvesting_job(job) job_controller = HarvestingJobController(job) job_controller.harvest_documents()
def create_harvesting_job(self, **kwds): from ckan.model import HarvestingJob job = HarvestingJob(**kwds) job.save() return job
def get_harvesting_jobs(self, **kwds): from ckan.model import HarvestingJob return HarvestingJob.filter(**kwds).all()