Exemple #1
0
def job_publish(request, org_id, record_group_id, job_id):
    LOGGER.debug(request.POST)

    # capture entered publish set id
    publish_set_id = request.POST.get('publish_set_id', None)

    # override with pre-existing publish set id is selected
    if request.POST.get('existing_publish_set_id', None) is not None:
        publish_set_id = request.POST.get('existing_publish_set_id')

    # get published subsets to include in
    published_subsets = request.POST.getlist('published_subsets', [])

    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # init publish
    cjob.publish_bg_task(
        publish_set_id=publish_set_id,
        in_published_subsets=published_subsets)

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html': '<p><strong>Publishing Job:</strong><br>%s<br><br><strong>Publish Set ID:</strong><br>%s</p><p><a href="%s"><button type="button" class="btn btn-outline-primary btn-sm">View Published Records</button></a></p>' % (
            cjob.job.name, publish_set_id, reverse('published')),
        'class': 'success'
    })

    return redirect('record_group',
                    org_id=cjob.job.record_group.organization.id,
                    record_group_id=cjob.job.record_group.id)
Exemple #2
0
def test_static_harvest_reindex(VO):
    # refresh job
    VO.static_harvest_cjob = CombineJob.get_combine_job(
        VO.static_harvest_cjob.job.id)

    # fm config json, adding literal foo:bar
    fm_config_json = '{"concat_values_on_all_fields": false, "capture_attribute_values": [], "remove_ns_prefix": true, "skip_attribute_ns_declarations": true, "remove_copied_key": true, "node_delim": "_", "copy_to": {}, "copy_value_to_regex": {}, "copy_to_regex": {}, "split_values_on_all_fields": false, "add_literals": {"foo":"bar"}, "exclude_attributes": [], "ns_prefix_delim": "|", "self_describing": false, "split_values_on_fields": {}, "include_attributes": [], "include_sibling_id": false, "multivalue_delim": "|", "skip_repeating_values": true, "repeating_element_suffix_count": false, "exclude_elements": [], "concat_values_on_fields": {}, "remove_copied_value": false, "error_on_delims_collision": false, "include_all_attributes": false, "skip_root": false}'

    # reindex static harvest
    bg_task = VO.static_harvest_cjob.reindex_bg_task(
        fm_config_json=fm_config_json)

    # poll until complete
    for x in range(0, 480):

        # pause
        time.sleep(1)
        LOGGER.debug('polling for reindexing %s seconds...' % (x))

        # refresh session
        bg_task.update()

        # check status
        if bg_task.celery_status not in ['SUCCESS', 'FAILURE']:
            continue
        else:
            break

    # assert 250 records have foo:bar, indicating successful reindexing
    results = VO.static_harvest_cjob.field_analysis('foo')
    assert results['metrics']['doc_instances'] == 250
Exemple #3
0
def clone_jobs(request):
    LOGGER.debug('cloning jobs')

    job_ids = request.POST.getlist('job_ids[]')

    # get downstream toggle
    downstream_toggle = request.POST.get('downstream_clone_toggle', False)
    if downstream_toggle == 'true':
        downstream_toggle = True
    elif downstream_toggle == 'false':
        downstream_toggle = False

    # get rerun toggle
    rerun_on_clone = request.POST.get('rerun_on_clone', False)
    if rerun_on_clone == 'true':
        rerun_on_clone = True
    elif rerun_on_clone == 'false':
        rerun_on_clone = False

    # set of jobs to rerun
    job_clone_set = set()

    # loop through job_ids and add
    for job_id in job_ids:
        cjob = CombineJob.get_combine_job(job_id)
        job_clone_set.add(cjob.job)

    # sort and run
    ordered_job_clone_set = sorted(list(job_clone_set), key=lambda j: j.id)

    # initiate Combine BG Task
    combine_task = CombineBackgroundTask(
        name="Clone Jobs",
        task_type='clone_jobs',
        task_params_json=json.dumps({
            'ordered_job_clone_set': [j.id for j in ordered_job_clone_set],
            'downstream_toggle': downstream_toggle,
            'rerun_on_clone': rerun_on_clone
        })
    )
    combine_task.save()

    # run celery task
    bg_task = tasks.clone_jobs.delay(combine_task.id)
    LOGGER.debug('firing bg task: %s', bg_task)
    combine_task.celery_task_id = bg_task.task_id
    combine_task.save()

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html': '<strong>Cloning Job(s):</strong><br>%s<br><br>Including downstream? <strong>%s</strong><br><br>Refresh this page to update status of Jobs cloning. <button class="btn-sm btn-outline-primary" onclick="location.reload();">Refresh</button>' % (
            '<br>'.join([str(j.name) for j in ordered_job_clone_set]), downstream_toggle),
        'class': 'success'
    })

    # return, as requested via Ajax which will reload page
    return JsonResponse({'results': True})
Exemple #4
0
def job_indexing_failures(request, org_id, record_group_id, job_id):
    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # return
    return render(request, 'core/job_indexing_failures.html', {
        'cjob': cjob,
        'breadcrumbs': breadcrumb_parser(request)
    })
Exemple #5
0
def stop_jobs(request):
    LOGGER.debug('stopping jobs')

    job_ids = request.POST.getlist('job_ids[]')
    LOGGER.debug(job_ids)

    # get downstream toggle
    downstream_toggle = request.POST.get('downstream_stop_toggle', False)
    if downstream_toggle == 'true':
        downstream_toggle = True
    elif downstream_toggle == 'false':
        downstream_toggle = False

    # set of jobs to rerun
    job_stop_set = set()

    # loop through job_ids
    for job_id in job_ids:

        # get CombineJob
        cjob = CombineJob.get_combine_job(job_id)

        # if including downstream
        if downstream_toggle:

            # add rerun lineage for this job to set
            job_stop_set.update(cjob.job.get_downstream_jobs())

        # else, just job
        else:

            job_stop_set.add(cjob.job)

    # sort and run
    ordered_job_delete_set = sorted(list(job_stop_set), key=lambda j: j.id)

    # # loop through and update visible elements of Job for front-end
    for job in ordered_job_delete_set:
        LOGGER.debug('stopping Job: %s', job)

        # stop job
        job.stop_job()

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html':
        '<p><strong>Stopped Job(s):</strong><br>%s</p>' %
        ('<br>'.join([j.name for j in ordered_job_delete_set])),
        'class':
        'danger'
    })

    # return
    return JsonResponse({'results': True})
Exemple #6
0
def job_validation_scenario_failures(request, org_id, record_group_id, job_id, job_validation_id):
    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # get job validation instance
    job_validation = JobValidation.objects.get(pk=int(job_validation_id))

    # return
    return render(request, 'core/job_validation_scenario_failures.html', {
        'cjob': cjob,
        'jv': job_validation,
        'breadcrumbs': breadcrumb_parser(request)
    })
Exemple #7
0
def rerun_jobs(request):
    LOGGER.debug('re-running jobs')

    # get job ids
    job_ids = request.POST.getlist('job_ids[]')

    # get downstream toggle
    downstream_toggle = bool_for_string(
        request.POST.get('downstream_rerun_toggle', False))
    upstream_toggle = bool_for_string(
        request.POST.get('upstream_rerun_toggle', False))

    # set of jobs to rerun
    job_rerun_set = set()

    # loop through job_ids
    for job_id in job_ids:

        # get CombineJob
        cjob = CombineJob.get_combine_job(job_id)

        # if including downstream
        if downstream_toggle:
            # add rerun lineage for this job to set
            job_rerun_set.update(
                cjob.job.get_downstream_jobs(include_self=False))

        if upstream_toggle:
            job_rerun_set.update(
                cjob.job.get_upstream_jobs(include_self=False))

        # else, just job
        job_rerun_set.add(cjob.job)

    # sort and run
    ordered_job_rerun_set = sorted(list(job_rerun_set), key=lambda j: j.id)

    tasks.rerun_jobs(ordered_job_rerun_set)

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html':
        '<strong>Preparing to Rerun Job(s):</strong><br>%s<br><br>Refresh this page to update status of Jobs rerunning. <button class="btn-sm btn-outline-primary" onclick="location.reload();">Refresh</button>'
        % '<br>'.join([str(j.name) for j in ordered_job_rerun_set]),
        'class':
        'success'
    })

    # return, as requested via Ajax which will reload page
    return JsonResponse({'results': True})
Exemple #8
0
def job_errors(request, org_id, record_group_id, job_id):
    LOGGER.debug('retrieving errors for job id: %s', job_id)

    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    job_error_list = cjob.get_job_errors()

    # return
    return render(request, 'core/job_errors.html', {
        'cjob': cjob,
        'job_errors': job_error_list,
        'breadcrumbs': breadcrumb_parser(request)
    })
Exemple #9
0
def move_jobs(request):
    LOGGER.debug('moving jobs')

    job_ids = request.POST.getlist('job_ids[]')
    record_group_id = request.POST.getlist('record_group_id')[0]

    # get downstream toggle
    downstream_toggle = request.POST.get('downstream_move_toggle', False)
    if downstream_toggle == 'true':
        downstream_toggle = True
    elif downstream_toggle == 'false':
        downstream_toggle = False

    # set of jobs to move
    job_move_set = set()

    # loop through job_ids
    for job_id in job_ids:

        # get CombineJob
        cjob = CombineJob.get_combine_job(job_id)

        # if including downstream
        if downstream_toggle:

            # add move lineage for this job to set
            job_move_set.update(cjob.job.get_downstream_jobs())

        # else, just job
        else:

            job_move_set.add(cjob.job)

    # sort and run
    ordered_job_move_set = sorted(list(job_move_set), key=lambda j: j.id)

    # loop through jobs
    for job in ordered_job_move_set:
        LOGGER.debug('moving Job: %s', job)

        new_record_group = RecordGroup.objects.get(pk=record_group_id)
        job.record_group = new_record_group
        job.save()

        LOGGER.debug('Job %s has been moved', job)

    # redirect
    return JsonResponse({'results': True})
def bg_task(request, task_id):
    # get task
    combine_task = CombineBackgroundTask.objects.get(pk=int(task_id))
    LOGGER.debug('retrieving task: %s', combine_task)

    # include job if mentioned in task params
    if 'job_id' in combine_task.task_params:
        cjob = CombineJob.get_combine_job(combine_task.task_params['job_id'])
    else:
        cjob = None

    return render(
        request, 'core/bg_task.html', {
            'ct': combine_task,
            'cjob': cjob,
            'breadcrumbs': breadcrumb_parser(request)
        })
Exemple #11
0
def job_update_name(request, org_id, record_group_id, job_id):
    if request.method == 'POST':

        # get CombineJob
        cjob = CombineJob.get_combine_job(job_id)

        # get job note
        job_name = request.POST.get('job_name')
        if job_name == '':
            job_name = None

        # update job note
        cjob.job.name = job_name
        cjob.job.save()

        # redirect
        return redirect(request.META.get('HTTP_REFERER'))
Exemple #12
0
def job_unpublish(request, org_id, record_group_id, job_id):
    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # init unpublish
    cjob.unpublish_bg_task()

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html': '<p><strong>Unpublishing Job:</strong><br>%s</p><p><a href="%s"><button type="button" class="btn btn-outline-primary btn-sm">View Published Records</button></a></p>' % (
            cjob.job.name, reverse('published')),
        'class': 'success'
    })

    return redirect('record_group',
                    org_id=cjob.job.record_group.organization.id,
                    record_group_id=cjob.job.record_group.id)
Exemple #13
0
def job_parameters(request, org_id, record_group_id, job_id):
    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # if GET, return JSON
    if request.method == 'GET':
        # return
        return JsonResponse(cjob.job.job_details_dict)

    # if POST, update
    if request.method == 'POST':

        # get job_details as JSON
        job_details_json = request.POST.get('job_details_json', None)

        if job_details_json is not None:
            cjob.job.job_details = job_details_json
            cjob.job.save()

        return JsonResponse({"msg": "Job Parameters updated!"})
Exemple #14
0
def job_reports_create_validation(request, org_id, record_group_id, job_id):
    """
    Generate job report based on validation results
    """

    # retrieve job
    cjob = CombineJob.get_combine_job(int(job_id))

    # if GET, prepare form
    if request.method == 'GET':

        # mapped field analysis, generate if not part of job_details
        if 'mapped_field_analysis' in cjob.job.job_details_dict.keys():
            field_counts = cjob.job.job_details_dict['mapped_field_analysis']
        else:
            if cjob.job.finished:
                field_counts = cjob.count_indexed_fields()
                cjob.job.update_job_details(
                    {'mapped_field_analysis': field_counts}, save=True)
            else:
                LOGGER.debug('job not finished, not setting')
                field_counts = {}

        # render page
        return render(request, 'core/job_reports_create_validation.html', {
            'cjob': cjob,
            'field_counts': field_counts,
            'breadcrumbs': breadcrumb_parser(request)
        })

    # if POST, generate report
    if request.method == 'POST':

        # get job name for Combine Task
        report_name = request.POST.get('report_name')
        if report_name == '':
            report_name = 'j_%s_validation_report' % cjob.job.id
            combine_task_name = "Validation Report: %s" % cjob.job.name
        else:
            combine_task_name = "Validation Report: %s" % report_name

        # handle POST params and save as Combine task params
        task_params = {
            'job_id': cjob.job.id,
            'report_name': report_name,
            'report_format': request.POST.get('report_format'),
            'compression_type': request.POST.get('compression_type'),
            'validation_scenarios': request.POST.getlist('validation_scenario', []),
            'mapped_field_include': request.POST.getlist('mapped_field_include', [])
        }

        # cast to int
        task_params['validation_scenarios'] = [
            int(vs_id) for vs_id in task_params['validation_scenarios']]

        # remove select, reserved fields if in mapped field request
        task_params['mapped_field_include'] = [f for f in task_params['mapped_field_include'] if
                                               f not in ['record_id', 'db_id', 'oid', '_id']]

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name=combine_task_name,
            task_type='validation_report',
            task_params_json=json.dumps(task_params)
        )
        combine_task.save()

        # run celery task
        background_task = tasks.create_validation_report.delay(combine_task.id)
        LOGGER.debug('firing bg task: %s', background_task)
        combine_task.celery_task_id = background_task.task_id
        combine_task.save()

        # redirect to Background Tasks
        return redirect('bg_tasks')
Exemple #15
0
def job_update(request, org_id, record_group_id, job_id):
    """
    Update Job in one of several ways:
        - re-map and index
        - run new / different validations
    """

    # retrieve job
    cjob = CombineJob.get_combine_job(int(job_id))

    # if GET, prepare form
    if request.method == 'GET':
        # get validation scenarios
        validation_scenarios = ValidationScenario.objects.all()

        # get field mappers
        field_mappers = FieldMapper.objects.all()
        orig_fm_config_json = cjob.job.get_fm_config_json()

        # get all bulk downloads
        bulk_downloads = DPLABulkDataDownload.objects.all()

        # get update type from GET params
        update_type = request.GET.get('update_type', None)

        # render page
        return render(request, 'core/job_update.html', {
            'cjob': cjob,
            'update_type': update_type,
            'validation_scenarios': validation_scenarios,
            'field_mappers': field_mappers,
            'bulk_downloads': bulk_downloads,
            'xml2kvp_handle': xml2kvp.XML2kvp(),
            'orig_fm_config_json': orig_fm_config_json,
            'breadcrumbs': breadcrumb_parser(request)
        })

    # if POST, submit job
    if request.method == 'POST':

        LOGGER.debug('updating job')
        LOGGER.debug(request.POST)

        # retrieve job
        cjob = CombineJob.get_combine_job(int(job_id))

        # get update type
        update_type = request.POST.get('update_type', None)
        LOGGER.debug('running job update: %s', update_type)

        # handle re-index
        if update_type == 'reindex':
            # get preferred metadata index mapper
            fm_config_json = request.POST.get('fm_config_json')

            # init re-index
            cjob.reindex_bg_task(fm_config_json=fm_config_json)

            # set gms
            gmc = GlobalMessageClient(request.session)
            gmc.add_gm({
                'html': '<p><strong>Re-Indexing Job:</strong><br>%s</p>'
                        '<p><a href="%s"><button type="button" '
                        'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>' % (
                            cjob.job.name, reverse('bg_tasks')),
                'class': 'success'
            })

            return redirect('job_details',
                            org_id=cjob.job.record_group.organization.id,
                            record_group_id=cjob.job.record_group.id,
                            job_id=cjob.job.id)

        # handle new validations
        if update_type == 'validations':
            # get requested validation scenarios
            validation_scenarios = request.POST.getlist(
                'validation_scenario', [])

            # get validations
            validations = ValidationScenario.objects.filter(
                id__in=[int(vs_id) for vs_id in validation_scenarios])

            # init bg task
            cjob.new_validations_bg_task([vs.id for vs in validations])

            # set gms
            gmc = GlobalMessageClient(request.session)
            gmc.add_gm({
                'html': '<p><strong>Running New Validations for Job:</strong><br>%s<br>'
                        '<br><strong>Validation Scenarios:</strong><br>%s</p>'
                        '<p><a href="%s"><button type="button" '
                        'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>' % (
                            cjob.job.name, '<br>'.join([vs.name for vs in validations]), reverse('bg_tasks')),
                'class': 'success'
            })

            return redirect('job_details',
                            org_id=cjob.job.record_group.organization.id,
                            record_group_id=cjob.job.record_group.id,
                            job_id=cjob.job.id)

        # handle validation removal
        if update_type == 'remove_validation':
            # get validation scenario to remove
            jv_id = request.POST.get('jv_id', False)

            # initiate Combine BG Task
            cjob.remove_validation_bg_task(jv_id)

            # set gms
            validation_scenario = JobValidation.objects.get(
                pk=int(jv_id)).validation_scenario
            gmc = GlobalMessageClient(request.session)
            gmc.add_gm({
                'html': '<p><strong>Removing Validation for Job:</strong><br>%s<br><br>'
                        '<strong>Validation Scenario:</strong><br>%s</p><p><a href="%s"><button type="button" '
                        'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>' % (
                            cjob.job.name, validation_scenario.name, reverse('bg_tasks')),
                'class': 'success'
            })

            return redirect('job_details',
                            org_id=cjob.job.record_group.organization.id,
                            record_group_id=cjob.job.record_group.id,
                            job_id=cjob.job.id)

        # handle validation removal
        if update_type == 'dbdm':
            # get validation scenario to remove
            dbdd_id = request.POST.get('dbdd', False)

            # initiate Combine BG Task
            cjob.dbdm_bg_task(dbdd_id)

            # set gms
            dbdd = DPLABulkDataDownload.objects.get(pk=int(dbdd_id))
            gmc = GlobalMessageClient(request.session)
            gmc.add_gm({
                'html': '<p><strong>Running DPLA Bulk Data comparison for Job:</strong><br>%s<br><br>'
                        '<strong>Bulk Data S3 key:</strong><br>%s</p><p><a href="%s"><button type="button" '
                        'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>' % (
                            cjob.job.name, dbdd.s3_key, reverse('bg_tasks')),
                'class': 'success'
            })

            return redirect('job_details',
                            org_id=cjob.job.record_group.organization.id,
                            record_group_id=cjob.job.record_group.id,
                            job_id=cjob.job.id)

        if update_type == 'publish_set':
            update_body = request.POST
            if update_body.get('publish_set_id', None):
                cjob.job.publish_set_id = update_body['publish_set_id']
            if update_body.get('existing_publish_set_id', None):
                cjob.job.publish_set_id = update_body['existing_publish_set_id']
            redirect_anchor = update_body.get('redirect_anchor', '')
            cjob.job.save()
            return redirect(reverse('job_details', args=[org_id, record_group_id, job_id]) + redirect_anchor)
Exemple #16
0
def delete_jobs(request):
    LOGGER.debug('deleting jobs')

    job_ids = request.POST.getlist('job_ids[]')
    LOGGER.debug(job_ids)

    # get downstream toggle
    downstream_toggle = request.POST.get('downstream_delete_toggle', False)
    if downstream_toggle == 'true':
        downstream_toggle = True
    elif downstream_toggle == 'false':
        downstream_toggle = False

    # set of jobs to delete
    job_delete_set = set()

    # loop through job_ids
    for job_id in job_ids:

        # get CombineJob
        cjob = CombineJob.get_combine_job(job_id)

        # if including downstream
        if downstream_toggle:

            # add delete lineage for this job to set
            job_delete_set.update(cjob.job.get_downstream_jobs())

        # else, just job
        else:

            job_delete_set.add(cjob.job)

    # sort and run
    ordered_job_delete_set = sorted(list(job_delete_set), key=lambda j: j.id)

    # # loop through and update visible elements of Job for front-end
    for job in ordered_job_delete_set:
        LOGGER.debug('deleting Job: %s', job)

        # set job status to deleting
        job.name = "%s (DELETING)" % job.name
        job.deleted = True
        job.status = 'deleting'
        job.save()

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name='Delete Job: #%s' % job.name,
            task_type='delete_model_instance',
            task_params_json=json.dumps({
                'model': 'Job',
                'job_id': job.id
            })
        )
        combine_task.save()

        # run celery task
        bg_task = tasks.delete_model_instance.delay('Job', job.id, )
        LOGGER.debug('firing bg task: %s', bg_task)
        combine_task.celery_task_id = bg_task.task_id
        combine_task.save()

    # set gms
    gmc = GlobalMessageClient(request.session)
    gmc.add_gm({
        'html': '<p><strong>Deleting Job(s):</strong><br>%s</p><p>Refresh this page to update status of removing Jobs. <button class="btn-sm btn-outline-primary" onclick="location.reload();">Refresh</button></p>' % (
            '<br>'.join([j.name for j in ordered_job_delete_set])),
        'class': 'danger'
    })

    # return
    return JsonResponse({'results': True})
Exemple #17
0
def export_mapped_fields(request,
                         export_source=None,
                         job_id=None,
                         subset=None):
    # get mapped fields export type
    mapped_fields_export_type = request.POST.get('mapped_fields_export_type')

    # check for Kibana check
    kibana_style = request.POST.get('kibana_style', False)
    if kibana_style:
        kibana_style = True

    # get archive type
    archive_type = request.POST.get('archive_type')

    # get selected fields if present
    mapped_field_include = request.POST.getlist('mapped_field_include', False)

    # export for single job
    if export_source == 'job':
        LOGGER.debug('exporting mapped fields from Job')

        # retrieve job
        cjob = CombineJob.get_combine_job(int(job_id))

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name='Export Mapped Fields for Job: %s' % cjob.job.name,
            task_type='export_mapped_fields',
            task_params_json=json.dumps({
                'job_id':
                cjob.job.id,
                'mapped_fields_export_type':
                mapped_fields_export_type,
                'kibana_style':
                kibana_style,
                'archive_type':
                archive_type,
                'mapped_field_include':
                mapped_field_include
            }))
        combine_task.save()

        # handle export output configurations
        combine_task = _handle_export_output(request, export_source,
                                             combine_task)

        # run celery task
        background_task = tasks.export_mapped_fields.delay(combine_task.id)
        LOGGER.debug('firing bg task: %s', background_task)
        combine_task.celery_task_id = background_task.task_id
        combine_task.save()

        # set gm
        gmc = GlobalMessageClient(request.session)
        target = "Job:</strong><br>%s" % cjob.job.name
        gmc.add_gm({
            'html':
            '<p><strong>Exporting Mapped Fields for %s</p><p><a href="%s"><button type="button" '
            'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>'
            % (target, reverse('bg_tasks')),
            'class':
            'success'
        })

        return redirect('job_details',
                        org_id=cjob.job.record_group.organization.id,
                        record_group_id=cjob.job.record_group.id,
                        job_id=cjob.job.id)

    # export for published
    if export_source == 'published':
        LOGGER.debug('exporting mapped fields from published records')

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name='Export Mapped Fields for Published Records',
            task_type='export_mapped_fields',
            task_params_json=json.dumps({
                'published':
                True,
                'subset':
                subset,
                'mapped_fields_export_type':
                mapped_fields_export_type,
                'kibana_style':
                kibana_style,
                'archive_type':
                archive_type,
                'mapped_field_include':
                mapped_field_include
            }))
        combine_task.save()

        # handle export output configurations
        combine_task = _handle_export_output(request, export_source,
                                             combine_task)

        # run celery task
        background_task = tasks.export_mapped_fields.delay(combine_task.id)
        LOGGER.debug('firing bg task: %s', background_task)
        combine_task.celery_task_id = background_task.task_id
        combine_task.save()

        # set gm
        gmc = GlobalMessageClient(request.session)
        target = ":</strong><br>Published Records"
        gmc.add_gm({
            'html':
            '<p><strong>Exporting Mapped Fields for %s</p><p><a href="%s"><button type="button" '
            'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>'
            % (target, reverse('bg_tasks')),
            'class':
            'success'
        })

        return redirect('published')
Exemple #18
0
def export_tabular_data(request, export_source=None, job_id=None, subset=None):
    # get records per file
    records_per_file = request.POST.get('records_per_file', False)
    if records_per_file in ['', False]:
        records_per_file = 500

    # get mapped fields export type
    tabular_data_export_type = request.POST.get('tabular_data_export_type')

    # get archive type
    archive_type = request.POST.get('archive_type')

    # get fm config json
    fm_export_config_json = request.POST.get('fm_export_config_json')

    # export for single job
    if export_source == 'job':
        LOGGER.debug('exporting tabular data from Job')

        # retrieve job
        cjob = CombineJob.get_combine_job(int(job_id))

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name='Export Tabular Data for Job: %s' % cjob.job.name,
            task_type='export_tabular_data',
            task_params_json=json.dumps({
                'job_id':
                cjob.job.id,
                'records_per_file':
                int(records_per_file),
                'tabular_data_export_type':
                tabular_data_export_type,
                'archive_type':
                archive_type,
                'fm_export_config_json':
                fm_export_config_json
            }))
        combine_task.save()

        # handle export output configurations
        combine_task = _handle_export_output(request, export_source,
                                             combine_task)

        # run celery task
        background_task = tasks.export_tabular_data.delay(combine_task.id)
        LOGGER.debug('firing bg task: %s', background_task)
        combine_task.celery_task_id = background_task.task_id
        combine_task.save()

        # set gm
        gmc = GlobalMessageClient(request.session)
        target = "Job:</strong><br>%s" % cjob.job.name
        gmc.add_gm({
            'html':
            '<p><strong>Exporting Tabular Data for %s</p><p><a href="%s"><button type="button" '
            'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>'
            % (target, reverse('bg_tasks')),
            'class':
            'success'
        })

        return redirect('job_details',
                        org_id=cjob.job.record_group.organization.id,
                        record_group_id=cjob.job.record_group.id,
                        job_id=cjob.job.id)

    # export for published
    if export_source == 'published':
        LOGGER.debug('exporting tabular data from published records')

        # get instance of Published model
        # TODO: not used
        PublishedRecords()

        # initiate Combine BG Task
        combine_task = CombineBackgroundTask(
            name='Export Tabular Data for Published Records',
            task_type='export_tabular_data',
            task_params_json=json.dumps({
                'published':
                True,
                'subset':
                subset,
                'records_per_file':
                int(records_per_file),
                'tabular_data_export_type':
                tabular_data_export_type,
                'archive_type':
                archive_type,
                'fm_export_config_json':
                fm_export_config_json
            }))
        combine_task.save()

        # handle export output configurations
        combine_task = _handle_export_output(request, export_source,
                                             combine_task)

        # run celery task
        background_task = tasks.export_tabular_data.delay(combine_task.id)
        LOGGER.debug('firing bg task: %s', background_task)
        combine_task.celery_task_id = background_task.task_id
        combine_task.save()

        # set gm
        gmc = GlobalMessageClient(request.session)
        target = ":</strong><br>Published Records"
        gmc.add_gm({
            'html':
            '<p><strong>Exporting Tabular Data for %s</p><p><a href="%s"><button type="button" '
            'class="btn btn-outline-primary btn-sm">View Background Tasks</button></a></p>'
            % (target, reverse('bg_tasks')),
            'class':
            'success'
        })

        return redirect('published')
Exemple #19
0
def job_details(request, org_id, record_group_id, job_id):
    LOGGER.debug('details for job id: %s', job_id)

    # get CombineJob
    cjob = CombineJob.get_combine_job(job_id)

    # update status
    cjob.job.update_status()

    # detailed record count
    record_count_details = cjob.job.get_detailed_job_record_count()

    # get job lineage
    job_lineage = cjob.job.get_lineage()

    # get dpla_bulk_data_match
    dpla_bulk_data_matches = cjob.job.get_dpla_bulk_data_matches()

    # check if limiting to one, pre-existing record
    get_q = request.GET.get('q', None)

    # job details and job type specific augment
    job_detail = cjob.job.job_details_dict

    # mapped field analysis, generate if not part of job_details
    if 'mapped_field_analysis' in job_detail.keys():
        field_counts = job_detail['mapped_field_analysis']
    else:
        if cjob.job.finished:
            field_counts = cjob.count_indexed_fields()
            cjob.job.update_job_details(
                {'mapped_field_analysis': field_counts}, save=True)
        else:
            LOGGER.debug('job not finished, not setting')
            field_counts = {}

    # TODO: What is this accomplishing?
    # OAI Harvest
    if isinstance(cjob, HarvestOAIJob):
        pass

    # Static Harvest
    elif isinstance(cjob, HarvestStaticXMLJob):
        pass

    # Transform
    elif isinstance(cjob, TransformJob):
        pass

    # Merge/Duplicate
    elif isinstance(cjob, MergeJob):
        pass

    # Analysis
    elif isinstance(cjob, AnalysisJob):
        pass

    # get published records, primarily for published sets
    pub_records = PublishedRecords()

    oai_sets = Record.objects(job_id=cjob.job.id).item_frequencies(field='oai_set')

    # get published subsets with PublishedRecords static method
    published_subsets = PublishedRecords.get_subsets()

    # loop through subsets and enrich
    for _ in published_subsets:

        # add counts
        counts = mc_handle.combine.misc.find_one(
            {'_id': 'published_field_counts_%s' % _['name']})

        # if counts not yet calculated, do now
        if counts is None:
            counts = PublishedRecords(
                subset=_['name']).count_indexed_fields()
        _['counts'] = counts

    # get field mappers
    field_mappers = FieldMapper.objects.all()

    # return
    return render(request, 'core/job_details.html', {
        'cjob': cjob,
        'record_group': cjob.job.record_group,
        'record_count_details': record_count_details,
        'field_counts': field_counts,
        'field_mappers': field_mappers,
        'xml2kvp_handle': xml2kvp.XML2kvp(),
        'job_lineage_json': json.dumps(job_lineage),
        'dpla_bulk_data_matches': dpla_bulk_data_matches,
        'q': get_q,
        'job_details': job_detail,
        'pr': pub_records,
        'published_subsets': published_subsets,
        'es_index_str': cjob.esi.es_index_str,
        'breadcrumbs': breadcrumb_parser(request),
        'oai_sets': dict(oai_sets)
    })