def run_service(request, service_id): """Run service given by name. Use id of service to retrieve it from a database. args: service_id(str): id of service to run. """ if request.method != "POST": return redirect(manage) results_messages = "" error_messages = "" # Get service information from service id. service = services.objects.get(service_id=service_id) # Pass in app name from apps.py. new_job = add_job(ServicesConfig.name) new_service_job = service_jobs.objects.create(job_id=new_job, service_id=service) new_service_job.save() results_messages = ["Added job successfully."] return redirect(job_status)
def add_pathauto_job(request, source_job_id): new_job = add_job(SwamplrIngestConfig.name, job_type_label="pathauto") ingest_job = ingest_jobs.objects.get(job_id=source_job_id) pathauto_jobs.objects.create( job_id=new_job, source_job=ingest_job, ) return redirect(job_status)
def add_delete_job(request, source_job_id): """Find all newly created objects associated with a given job.""" new_job = add_job(SwamplrIngestConfig.name, job_type_label="delete") ingest_job = ingest_jobs.objects.get(job_id=source_job_id) delete_jobs.objects.create( job_id=new_job, source_job=ingest_job, ) return redirect(job_status)
def pre_process(): """Determine if there are any services that are scheduled to be run now. Also, check if there are any jobs to archive based on the auto_archive time field. """ # Get the Jobs that are scheduled to be run # Based both on frequency/last_started as well as if they have # active jobs running for that service pending_jobs = services.objects.raw(""" SELECT DISTINCT s.* FROM swamplr_services_services s LEFT JOIN swamplr_services_service_jobs sj ON (s.service_id = sj.service_id_id) LEFT JOIN swamplr_jobs_jobs j ON (sj.job_id_id = j.job_id) LEFT JOIN swamplr_jobs_status t ON (t.status_id = j.status_id_id AND t.running='y') WHERE s.frequency IS NOT NULL AND ((NOW() >= DATE_ADD(s.last_started, INTERVAL s.frequency MINUTE) AND t.status_id IS NULL)|| s.last_started IS NULL) AND s.archived != 'y' """) # Add new jobs for all services found logging.info( "swamplr_services: Found {0} services that are scheduled to run.". format(len(list(pending_jobs)))) for job in pending_jobs: logging.info( "swamplr_service: Creating new job for service_id {0}".format( job.service_id)) # Get service information from service id. service = services.objects.get(service_id=job.service_id) # Pass in app name from apps.py. new_job = add_job(ServicesConfig.name) new_service_job = service_jobs.objects.create(job_id=new_job, service_id=job) new_service_job.save() pending_archive = service_jobs.objects.raw(""" SELECT DISTINCT sj.* FROM swamplr_services_services s LEFT JOIN swamplr_services_service_jobs sj ON (s.service_id = sj.service_id_id) LEFT JOIN swamplr_jobs_jobs j ON (j.job_id = sj.job_id_id) LEFT JOIN swamplr_jobs_status t ON (t.status_id = j.status_id_id AND t.success='y') WHERE s.auto_archive IS NOT NULL AND s.auto_archive > 0 AND j.archived != 'y' AND DATE_ADD(j.completed, INTERVAL s.auto_archive MINUTE) <= NOW() AND t.success='y' """) # Archive each job logging.info( "swamplr_services: Found {0} service jobs that can be archived.". format(len(list(pending_archive)))) for job in pending_archive: logging.info("swamplr_service: Archiving {0}".format(job.job_id_id)) jobs.objects.filter(job_id=job.job_id_id).update(archived='y')
def delete(request, ns): """Add delete job to queue.""" logging.info("Adding delete job for namespace: {0}".format(ns)) new_job = add_job(SwamplrNamespacesConfig.name) ns_operation = namespace_operations.objects.get(operation_name="Delete") namespace_jobs.objects.create( job_id=new_job, namespace=ns, operation_id=ns_operation ) return redirect(job_status)
def reindex(request, ns): if ns == "all": logging.info("Adding reindex job for all items.") ns = "[all items]" else: logging.info("Adding reindex job for namespace: {0}".format(ns)) new_job = add_job(SwamplrNamespacesConfig.name) ns_operation = namespace_operations.objects.get(operation_name="Reindex") namespace_jobs.objects.create( job_id=new_job, namespace=ns, operation_id=ns_operation ) return redirect(job_status)
def add_id_job(ns, id_type): """Add job to queue.""" logging.info("Adding {0} job for namespace: {1}".format(id_type, ns)) new_job = add_job(SwamplrNamespacesConfig.name) if id_type.lower() == "doi": ns_operation = namespace_operations.objects.get(operation_name="Mint DOI") elif id_type.lower() == "ark": ns_operation = namespace_operations.objects.get(operation_name="Mint ARK") elif id_type.lower() == "pathauto": ns_operation = namespace_operations.objects.get(operation_name="Pathauto") namespace_jobs.objects.create( job_id=new_job, namespace=ns, operation_id=ns_operation ) return redirect(job_status)
def add_derivatives_job(request, item_type): """Create a new job for the derivative form that was submitted""" response = { "error_messages": [], "result_messages": [], "base_dir": settings.DATA_PATHS, } #derive_options = get_derivative_options("source." + item_type.lower()) form = DerivativesForm(request.POST) form.set_fields(item_type) if form.is_valid(): clean = form.cleaned_data object_derivatives = clean["derive_types"] replace = "y" if clean["replace_on_duplicate"] else "" subset = int(clean["subset_value"]) if clean["subset_value"] else 0 new_job = add_job(SwamplrDerivativesConfig.name) derivative_job = derivative_jobs.objects.create( job_id=new_job, source_dir=clean["path_list_selected"], replace_on_duplicate=replace, subset=subset, source_file_extension=item_type, brightness=clean["brightness_value"], contrast=clean["contrast_value"]) for d in object_derivatives: new_derive = job_derivatives.objects.create( derive_id=derivative_job, derive_type=d, parameters="" # TODO -- populate ) else: message = ["Unable to add job: Missing or invalid form data."] return run_derivatives(request, item_type, message=message) return redirect(job_status)
def add_ingest_job(request, collection_name): response = { "error_messages": [], "result_messages": [], "base_dir": settings.DATA_PATHS, } collection_data = get_ingest_data(collection_name) form = IngestForm(request.POST) form.set_fields(collection_name) if form.is_valid(): clean = form.cleaned_data object_datastreams = get_all_datastreams(collection_data, clean) metadata_datastreams = get_all_metadata(collection_data, clean) all_datastreams = object_datastreams + metadata_datastreams # if at least one datastream for the object type (otype) was selected, # make sure DC was added as a job_datastream counts = {} for ds, otype, dtype in all_datastreams: if otype in counts: counts[otype]["cnt"] = counts[otype]["cnt"] + 1 else: counts[otype] = {} counts[otype]["cnt"] = 0 if ds == "DC": counts[otype]["has_dc"] = True for obj_type, obj_data in counts.items(): if "has_dc" not in obj_data: all_datastreams.append(("DC", obj_type, "metadata")) process_new = "y" if "process_new" in clean["process"] else "" process_existing = "y" if "process_existing" in clean["process"] else "" replace_on_duplicate = "y" if clean["replace_on_duplicate"] else "" subset = int(clean["subset_value"]) if clean["subset_value"] else 0 new_job = add_job(SwamplrIngestConfig.name, job_type_label="ingest") ingest_job = ingest_jobs.objects.create( job_id=new_job, source_dir=clean["path_list_selected"], replace_on_duplicate=replace_on_duplicate, collection_name=collection_name, namespace=clean["pid_namespace"], process_new=process_new, process_existing=process_existing, subset=subset, ) ds_options = datastreams.objects.all() existing_ds = [ds.datastream_label for ds in ds_options] added = [] for ds, otype, dtype in all_datastreams: if ds not in existing_ds and ds not in added: is_object = "y" if dtype == "datastreams" else "" new_ds = datastreams.objects.create( datastream_label=ds, is_object=is_object, ) added.append(ds) job_datastream = job_datastreams.objects.create( ingest_id=ingest_job, object_type=otype, datastream_id=datastreams.objects.get(datastream_label=ds), ) else: message = ["Unable to add job: Missing or invalid form data."] return run_ingest(request, collection_name, message=message) return redirect(job_status)