def _check_for_existing_jobs(context, source_id): ''' Given a source id, checks if there are jobs for this source with status 'New' or 'Running' rtype: boolean ''' data_dict = { 'source_id': source_id, 'status': u'New' } exist_new = harvest_job_list(context, data_dict) data_dict = { 'source_id': source_id, 'status': u'Running' } exist_running = harvest_job_list(context, data_dict) exist = len(exist_new + exist_running) > 0 return exist
def harvest_jobs_run(context, data_dict): log.info('Harvest job run: %r', data_dict) check_access('harvest_jobs_run', context, data_dict) session = context['session'] source_id = data_dict.get('source_id', None) if not source_id: _make_scheduled_jobs(context, data_dict) context['return_objects'] = False # Flag finished jobs as such jobs = harvest_job_list( context, { 'source_id': source_id, 'status': u'Running'}) # jobs[0]['gather_finished']=True # print(jobs) if len(jobs): for job in jobs: if job['gather_finished']: objects = session.query(HarvestObject.id) \ .filter(HarvestObject.harvest_job_id == job['id']) \ .filter(and_((HarvestObject.state != u'COMPLETE'), (HarvestObject.state != u'ERROR'))) \ .order_by(HarvestObject.import_finished.desc()) if objects.count() == 0: # harmonisation job create for harvest jobs finished fetch # stage job_source_id = job['source_id'] # get harvest info harvest_source_info = HarvestSource.get(job['source_id']) cat_url = harvest_source_info.url title = harvest_source_info.title db = client.odm collection = db.jobs document = collection.find_one({"title": title}) # if job exists then create harmonisation job if document is not None: harmonisation_job = {} harmonisation_job['id'] = document['id'] harmonisation_job['cat_url'] = document['cat_url'] try: harmonised = document['harmonised'] except KeyError: harmonised = "not yet" harmonisation_job['harmonised'] = harmonised harmonisation_job['status'] = "pending" harmonisation_job['dates'] = "dates_selected" harmonisation_job['countries'] = "countries_selected" harmonisation_job['catalogue_selection'] = title harmonisation_job['languages'] = "languages_selected" harmonisation_job['resources'] = "resources_selected" harmonisation_job['licenses'] = "licenses_selected" harmonisation_job['categories'] = "categories_selected" harmonisation_job['save'] = "go-harmonisation-complete" # create harmonise job to db collection_harmonise_jobs = db.harmonise_jobs collection_harmonise_jobs.save(harmonisation_job) job_obj = HarvestJob.get(job['id']) job_obj.status = u'Finished' last_object = session.query(HarvestObject) \ .filter(HarvestObject.harvest_job_id == job['id']) \ .filter(HarvestObject.import_finished is not None) \ .order_by(HarvestObject.import_finished.desc()) \ .first() if last_object: try: job_obj.finished = last_object.import_finished except: pass try: job_obj.save() except: pass # Reindex the harvest source dataset so it has the latest # status try: get_action('harvest_source_reindex')( context, {'id': job_obj.source.id}) except: pass # resubmit old redis tasks resubmit_jobs() # Check if there are pending harvest jobs jobs = harvest_job_list( context, { 'source_id': source_id, 'status': u'New'}) if len(jobs) == 0: log.info('No new harvest jobs.') raise Exception('There are no new harvesting jobs') # Send each job to the gather queue publisher = get_gather_publisher() sent_jobs = [] for job in jobs: context['detailed'] = False source = harvest_source_show(context, {'id': job['source_id']}) if source['active']: job_obj = HarvestJob.get(job['id']) job_obj.status = job['status'] = u'Running' job_obj.save() publisher.send({'harvest_job_id': job['id']}) log.info('Sent job %s to the gather queue' % job['id']) sent_jobs.append(job) publisher.close() return sent_jobs