def _check_for_existing_jobs(context, source_id):
    '''
    Given a source id, checks if there are jobs for this source
    with status 'New' or 'Running'

    rtype: boolean
    '''
    data_dict = {
        'source_id': source_id,
        'status': u'New'
    }
    exist_new = harvest_job_list(context, data_dict)
    data_dict = {
        'source_id': source_id,
        'status': u'Running'
    }
    exist_running = harvest_job_list(context, data_dict)
    exist = len(exist_new + exist_running) > 0

    return exist
def harvest_jobs_run(context, data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)

    session = context['session']

    source_id = data_dict.get('source_id', None)

    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(
        context, {
            'source_id': source_id, 'status': u'Running'})
    # jobs[0]['gather_finished']=True
    # print(jobs)
    if len(jobs):
        for job in jobs:
            if job['gather_finished']:

                objects = session.query(HarvestObject.id) \
                    .filter(HarvestObject.harvest_job_id == job['id']) \
                    .filter(and_((HarvestObject.state != u'COMPLETE'),
                                 (HarvestObject.state != u'ERROR'))) \
                    .order_by(HarvestObject.import_finished.desc())

                if objects.count() == 0:

                    # harmonisation job create for harvest jobs finished fetch
                    # stage
                    job_source_id = job['source_id']
                    # get harvest info
                    harvest_source_info = HarvestSource.get(job['source_id'])
                    cat_url = harvest_source_info.url
                    title = harvest_source_info.title
                    db = client.odm
                    collection = db.jobs
                    document = collection.find_one({"title": title})
                    # if job exists then create harmonisation job
                    if document is not None:
                        harmonisation_job = {}
                        harmonisation_job['id'] = document['id']
                        harmonisation_job['cat_url'] = document['cat_url']
                        try:
                            harmonised = document['harmonised']
                        except KeyError:
                            harmonised = "not yet"
                        harmonisation_job['harmonised'] = harmonised
                        harmonisation_job['status'] = "pending"
                        harmonisation_job['dates'] = "dates_selected"
                        harmonisation_job['countries'] = "countries_selected"
                        harmonisation_job['catalogue_selection'] = title
                        harmonisation_job['languages'] = "languages_selected"
                        harmonisation_job['resources'] = "resources_selected"
                        harmonisation_job['licenses'] = "licenses_selected"
                        harmonisation_job['categories'] = "categories_selected"
                        harmonisation_job['save'] = "go-harmonisation-complete"

                # create harmonise job to db
                        collection_harmonise_jobs = db.harmonise_jobs
                        collection_harmonise_jobs.save(harmonisation_job)
                        job_obj = HarvestJob.get(job['id'])
                        job_obj.status = u'Finished'

                    last_object = session.query(HarvestObject) \
                        .filter(HarvestObject.harvest_job_id == job['id']) \
                        .filter(HarvestObject.import_finished is not None) \
                        .order_by(HarvestObject.import_finished.desc()) \
                        .first()
                    if last_object:
                        try:
                            job_obj.finished = last_object.import_finished
                        except:
                            pass
                    try:
                        job_obj.save()
                    except:
                        pass
                    # Reindex the harvest source dataset so it has the latest
                    # status

                    try:
                        get_action('harvest_source_reindex')(
                            context, {'id': job_obj.source.id})
                    except:
                        pass

    # resubmit old redis tasks
    resubmit_jobs()

    # Check if there are pending harvest jobs
    jobs = harvest_job_list(
        context, {
            'source_id': source_id, 'status': u'New'})
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        raise Exception('There are no new harvesting jobs')

    # Send each job to the gather queue
    publisher = get_gather_publisher()
    sent_jobs = []
    for job in jobs:
        context['detailed'] = False
        source = harvest_source_show(context, {'id': job['source_id']})
        if source['active']:
            job_obj = HarvestJob.get(job['id'])
            job_obj.status = job['status'] = u'Running'
            job_obj.save()
            publisher.send({'harvest_job_id': job['id']})
            log.info('Sent job %s to the gather queue' % job['id'])
            sent_jobs.append(job)

    publisher.close()
    return sent_jobs