def monitor(self, monitor_reference): # type: (JobExecution) -> bool execution = monitor_reference["execution"] max_retries = 20 # using 'wait_secs' incremental delays, this is ~3min of retry attempts num_retries = 0 run_step = 0 job_id = "<undefined>" while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: execution = check_wps_status(location=execution.statusLocation, sleep_secs=wait_secs(run_step), settings=self.settings) monitor_reference[ "execution"] = execution # update reference for later stages job_id = execution.statusLocation.split("/")[-1].replace( ".xml", "") exec_status = status.map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, execution.statusMessage, execution.statusLocation)) log_msg = get_job_log_msg(status=exec_status, message=execution.statusMessage, progress=execution.percentCompleted, duration=None) # get if available log_progress = map_progress(execution.percentCompleted, REMOTE_JOB_PROGRESS_MONITOR, REMOTE_JOB_PROGRESS_RESULTS) self.update_status(log_msg, log_progress, status.STATUS_RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." exec_status = status.map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, exec_msg, execution.statusLocation)) return False return True
def submit_job_handler(payload, # type: JSON settings, # type: SettingsType service_url, # type: str provider_id=None, # type: Optional[str] process_id=None, # type: str is_workflow=False, # type: bool is_local=True, # type: bool visibility=None, # type: Optional[str] language=None, # type: Optional[str] auth=None, # type: Optional[HeaderCookiesType] tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] ): # type: (...) -> JSON """ Submits the job to the Celery worker with provided parameters. Assumes that parameters have been pre-fetched and validated, except for the input payload. """ try: json_body = sd.Execute().deserialize(payload) except colander.Invalid as ex: raise HTTPBadRequest("Invalid schema: [{}]".format(str(ex))) # TODO: remove when all parameter variations are supported # FIXME: # - support 'sync' and 'Prefer' header variants (https://github.com/crim-ca/weaver/issues/247) # - support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) # - allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375) _validate_job_parameters(json_body) is_execute_async = json_body["mode"] != EXECUTE_MODE_SYNC # convert auto to async notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None store = get_db(settings).get_store(StoreJobs) job = store.save_job(task_id=STATUS_ACCEPTED, process=process_id, service=provider_id, inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, execute_async=is_execute_async, custom_tags=tags, notification_email=encrypted_email, accept_language=language, context=context) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=STATUS_ACCEPTED, progress=0) job = store.update_job(job) result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=auth) LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) # local/provider process location location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else "" location = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( base_url=get_wps_restapi_base_url(settings), location_base=location_base, process_id=process_id, job_id=job.id) body_data = { "jobID": job.id, "processID": job.process, "providerID": provider_id, # dropped by validator if not applicable "status": map_status(STATUS_ACCEPTED), "location": location } return body_data
def json(self, container=None): # pylint: disable=W0221,arguments-differ # type: (Optional[AnySettingsContainer]) -> JSON """Obtain the JSON data representation for response body. .. note:: Settings are required to update API shortcut URLs to job additional information. Without them, paths will not include the API host, which will not resolve to full URI. """ settings = get_settings(container) if container else {} job_json = { "jobID": self.id, "status": self.status, "message": self.status_message, "duration": self.duration_str, "percentCompleted": self.progress, } job_url = self._job_url(settings) # TODO: use links (https://github.com/crim-ca/weaver/issues/58) if self.status in JOB_STATUS_CATEGORIES[STATUS_CATEGORY_FINISHED]: job_status = map_status(self.status) if job_status == STATUS_SUCCEEDED: resource_type = "result" else: resource_type = "exceptions" job_json[resource_type] = "{job_url}/{res}".format(job_url=job_url, res=resource_type.lower()) job_json["logs"] = "{job_url}/logs".format(job_url=job_url) return sd.JobStatusInfo().deserialize(job_json)
def dismiss_job_task(job, container): # type: (Job, AnySettingsContainer) -> Job """ Cancels any pending or running :mod:`Celery` task and removes completed job artifacts. .. note:: The :term:`Job` object itself is not deleted, only its artifacts. Therefore, its inputs, outputs, logs, exceptions, etc. are still available in the database, but corresponding files that would be exposed by ``weaver.wps_output`` configurations are removed. :param job: Job to cancel or cleanup. :param container: Application settings. :return: Updated and dismissed job. """ raise_job_dismissed(job, container) if job.status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: # signal to stop celery task. Up to it to terminate remote if any. LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id, job.task_id) celery_app.control.revoke(job.task_id, terminate=True) wps_out_dir = get_wps_output_dir(container) job_out_dir = os.path.join(wps_out_dir, str(job.id)) job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log") job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml") if os.path.isdir(job_out_dir): LOGGER.debug("Job [%s] dismiss operation: Removing output results.", job.id) shutil.rmtree( job_out_dir, onerror=lambda func, path, _exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_dir, _exc)) if os.path.isfile(job_out_log): LOGGER.debug("Job [%s] dismiss operation: Removing output logs.", job.id) try: os.remove(job_out_log) except OSError as exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_log, exc) if os.path.isfile(job_out_xml): LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.", job.id) try: os.remove(job_out_xml) except OSError as exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_xml, exc) LOGGER.debug("Job [%s] dismiss operation: Updating job status.") store = get_db(container).get_store(StoreJobs) job.status_message = f"Job {Status.DISMISSED}." job.status = map_status(Status.DISMISSED) job = store.update_job(job) return job
def monitor(self, monitor_reference): # type: (str) -> bool job_status_uri = monitor_reference job_status_data = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status_data["status"]) job_id = job_status_data["jobID"] self.update_status( "Monitoring job on remote ADES : {0}".format(job_status_uri), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) while job_status_value not in status.JOB_STATUS_CATEGORIES[ status.JOB_STATUS_CATEGORY_FINISHED]: sleep(5) job_status_data = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status_data["status"]) LOGGER.debug( get_log_monitor_msg(job_id, job_status_value, job_status_data.get("percentCompleted", 0), get_any_message(job_status_data), job_status_data.get("statusLocation"))) self.update_status( get_job_log_msg( status=job_status_value, message=get_any_message(job_status_data), progress=job_status_data.get("percentCompleted", 0), duration=job_status_data.get("duration", None)), # get if available map_progress(job_status_data.get("percentCompleted", 0), REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) if job_status_value != status.STATUS_SUCCEEDED: LOGGER.debug( get_log_monitor_msg(job_id, job_status_value, job_status_data.get("percentCompleted", 0), get_any_message(job_status_data), job_status_data.get("statusLocation"))) raise PackageExecutionError(job_status_data) return True
def dismiss_job_task(job, container): # type: (Job, AnySettingsContainer) -> Job """ Cancels any pending or running :mod:`Celery` task and removes completed job artifacts. :param job: job to cancel or cleanup. :param container: :return: """ raise_job_dismissed(job, container) if job.status in status.JOB_STATUS_CATEGORIES[ status.JOB_STATUS_CATEGORY_RUNNING]: # signal to stop celery task. Up to it to terminate remote if any. LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id, job.task_id) celery_app.control.revoke(job.task_id, terminate=True) wps_out_dir = get_wps_output_dir(container) job_out_dir = os.path.join(wps_out_dir, str(job.id)) job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log") job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml") if os.path.isdir(job_out_dir): LOGGER.debug("Job [%s] dismiss operation: Removing output results.", job.id) shutil.rmtree( job_out_dir, onerror=lambda func, path, _exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_dir, _exc)) if os.path.isfile(job_out_log): LOGGER.debug("Job [%s] dismiss operation: Removing output logs.", job.id) try: os.remove(job_out_log) except OSError as exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_log, exc) if os.path.isfile(job_out_xml): LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.", job.id) try: os.remove(job_out_xml) except OSError as exc: LOGGER.warning( "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_xml, exc) LOGGER.debug("Job [%s] dismiss operation: Updating job status.") store = get_db(container).get_store(StoreJobs) job.status_message = "Job {}.".format(status.STATUS_DISMISSED) job.status = status.map_status(status.STATUS_DISMISSED) job = store.update_job(job) return job
def save_job(self, task_id, # type: str process, # type: str service=None, # type: Optional[str] inputs=None, # type: Optional[List[Any]] is_workflow=False, # type: bool is_local=False, # type: bool user_id=None, # type: Optional[int] execute_async=True, # type: bool custom_tags=None, # type: Optional[List[str]] access=None, # type: Optional[str] notification_email=None, # type: Optional[str] accept_language=None, # type: Optional[str] ): # type: (...) -> Job """ Stores a job in mongodb. """ try: tags = ["dev"] tags.extend(list(filter(lambda t: bool(t), custom_tags or []))) # remove empty tags if is_workflow: tags.append(PROCESS_WORKFLOW) else: tags.append(PROCESS_APPLICATION) if execute_async: tags.append(EXECUTE_MODE_ASYNC) else: tags.append(EXECUTE_MODE_SYNC) if not access: access = VISIBILITY_PRIVATE new_job = Job({ "task_id": task_id, "user_id": user_id, "service": service, # provider identifier (WPS service) "process": process, # process identifier (WPS request) "inputs": inputs, "status": map_status(STATUS_ACCEPTED), "execute_async": execute_async, "is_workflow": is_workflow, "is_local": is_local, "created": now(), "tags": list(set(tags)), # remove duplicates "access": access, "notification_email": notification_email, "accept_language": accept_language, }) self.collection.insert_one(new_job.params()) job = self.fetch_by_id(job_id=new_job.id) except Exception as ex: raise JobRegistrationError("Error occurred during job registration: [{}]".format(repr(ex))) if job is None: raise JobRegistrationError("Failed to retrieve registered job.") return job
def get_job_status(self, job_status_uri, retry=True): # type: (JobMonitorReference, Union[bool, int]) -> JSON """ Obtains the contents from the :term:`Job` status response. """ response = self.make_request( method="GET", url=job_status_uri, retry=retry) # retry in case not yet ready response.raise_for_status() job_status = response.json() job_id = job_status_uri.split("/")[-1] if "jobID" not in job_status: job_status["jobID"] = job_id # provide if not implemented by ADES job_status["status"] = status.map_status(job_status["status"]) return job_status
def cancel_job(request): """ Dismiss a job. Note: Will only stop tracking this particular process (WPS 1.0 doesn't allow to stop a process) """ job = get_job(request) app.control.revoke(job.task_id, terminate=True) store = get_db(request).get_store(StoreJobs) job.status_message = "Job dismissed." job.status = status.map_status(status.STATUS_DISMISSED) store.update_job(job) return HTTPOk(json={ "jobID": job.id, "status": job.status, "message": job.status_message, "percentCompleted": job.progress, })
def get_job_status(self, job_status_uri, retry=True): response = self.make_request(method="GET", url=job_status_uri, retry=True, status_code_mock=HTTPNotFound.code) # Retry on 404 since job may not be fully ready if retry and response.status_code == HTTPNotFound.code: sleep(5) return self.get_job_status(job_status_uri, retry=False) response.raise_for_status() job_status = response.json() # TODO Remove patch for Geomatys not conforming to the status schema # - jobID is missing # - handled by 'map_status': status are upper cases and succeeded process are indicated as successful job_id = job_status_uri.split("/")[-1] if "jobID" not in job_status: job_status["jobID"] = job_id job_status["status"] = status.map_status(job_status["status"]) return job_status
def get_job_results(request): # type: (Request) -> HTTPException """ Retrieve the results of a job. """ job = get_job(request) raise_job_dismissed(job, request) job_status = status.map_status(job.status) if job_status in status.JOB_STATUS_CATEGORIES[ status.JOB_STATUS_CATEGORY_RUNNING]: raise HTTPNotFound( json={ "code": "ResultsNotReady", "description": "Job status is '{}'. Results are not yet available.".format( job_status) }) results = get_results(job, request, value_key="value", ogc_api=True) # note: cannot add links in this case because variable OutputID keys are directly at the root results = sd.Result().deserialize(results) return HTTPOk(json=results)
def get_job_submission_response(body, headers, error=False): # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated] """ Generates the successful response from contents returned by :term:`Job` submission process. If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. .. seealso:: :func:`weaver.processes.execution.submit_job` :func:`weaver.processes.execution.submit_job_handler` """ # convert headers to pass as list to avoid any duplicate Content-related headers # otherwise auto-added by JSON handling when provided by dict-like structure if hasattr(headers, "items"): headers = list(headers.items()) get_header("Content-Type", headers, pop=True) headers.append(("Content-Type", ContentType.APP_JSON)) status = map_status(body.get("status")) if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: if error: http_class = HTTPBadRequest http_desc = sd.FailedSyncJobResponse.description else: http_class = HTTPOk http_desc = sd.CompletedJobResponse.description body = sd.CompletedJobStatusSchema().deserialize(body) body["description"] = http_desc return http_class(json=body, headerlist=headers) body["description"] = sd.CreatedLaunchJobResponse.description body = sd.CreatedJobStatusSchema().deserialize(body) return HTTPCreated(json=body, headerlist=headers)
def test_map_status_pywps_compliant_as_int_statuses(): for s in range(len(WPS_STATUS)): if STATUS_PYWPS_MAP[s] != Status.UNKNOWN: assert map_status(s, StatusCompliant.PYWPS) in JOB_STATUS_CATEGORIES[StatusCompliant.PYWPS]
def execute(self, workflow_inputs, out_dir, expected_outputs): self.update_status("Preparing execute request for remote WPS1 provider.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for %s", self.process) try: try: wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify) raise_on_xml_exception(wps._capabilities) # noqa: W0212 except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex))) try: process = wps.describeprocess(self.process) except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex))) # prepare inputs complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) # remove any 'null' input, should employ the 'default' of the remote WPS process inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs) wps_inputs = [] for input_key in inputs_provided_keys: input_val = workflow_inputs[input_key] # in case of array inputs, must repeat (id,value) # in case of complex input (File), obtain location, otherwise get data value if not isinstance(input_val, list): input_val = [input_val] input_values = [] for val in input_val: if isinstance(val, dict): val = val["location"] # owslib only accepts strings, not numbers directly if isinstance(val, (int, float)): val = str(val) if val.startswith("file://"): # we need to host file starting with file:// scheme val = self.host_file(val) input_values.append(val) # need to use ComplexDataInput structure for complex input # TODO: BoundingBox not supported for input_value in input_values: if input_key in complex_inputs: input_value = ComplexDataInput(input_value) wps_inputs.append((input_key, input_value)) # prepare outputs outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs if o.identifier in expected_outputs] self.update_status("Executing job on remote WPS1 provider.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) mode = EXECUTE_MODE_ASYNC execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True) if not execution.process and execution.errors: raise execution.errors[0] self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) max_retries = 5 num_retries = 0 run_step = 0 job_id = "<undefined>" while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) try: execution = check_wps_status(location=execution.statusLocation, verify=self.verify, sleep_secs=wait_secs(run_step)) job_id = execution.statusLocation.replace(".xml", "").split("/")[-1] LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, execution.statusMessage, execution.statusLocation)) self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()), message=execution.statusMessage, progress=execution.percentCompleted, duration=None), # get if available map_progress(execution.percentCompleted, REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, exec_msg, execution.statusLocation)) raise Exception(execution.statusMessage or "Job failed.") self.update_status("Fetching job outputs from remote WPS1 provider.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = [ows2json_output(output, process) for output in execution.processOutputs] for result in results: result_id = get_any_id(result) result_val = get_any_value(result) if result_id in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]]) # TODO Should we handle other type than File reference? resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings) LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) except Exception as exc: exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) LOGGER.exception(exc) raise Exception(errors) self.update_status("Execution on remote WPS1 provider completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def test_map_status_pywps_compliant_as_int_statuses(): for s in range(len(WPS_STATUS)): if status.STATUS_PYWPS_MAP[s] != status.STATUS_UNKNOWN: assert status.map_status(s, status.STATUS_COMPLIANT_PYWPS) in \ status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_PYWPS]
def test_map_status_owslib_compliant(): for sv in status.JOB_STATUS_VALUES: for s in get_status_variations(sv): assert status.map_status(s, status.STATUS_COMPLIANT_OWSLIB) in \ status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_OWSLIB]
def test_map_status_back_compatibility_and_special_cases(): for c in [ status.STATUS_COMPLIANT_OGC, status.STATUS_COMPLIANT_PYWPS, status.STATUS_COMPLIANT_OWSLIB ]: assert status.map_status("successful", c) == status.STATUS_SUCCEEDED
def execute_process(task, job_id, wps_url, headers=None): # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType """ Celery task that executes the WPS process job monitoring as status updates (local and remote). """ from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") task_process = get_celery_process() rss_start = task_process.memory_info().rss registry = get_registry( None) # local thread, whether locally or dispatched celery settings = get_settings(registry) db = get_db( registry, reset_connection=True ) # reset the connection because we are in a forked celery process store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.started = now() job.status = Status.STARTED # will be mapped to 'RUNNING' job.status_message = f"Job {Status.STARTED}." # will preserve detail of STARTED vs RUNNING job.save_log(message=job.status_message) task_logger = get_task_logger(__name__) job.save_log(logger=task_logger, message="Job task setup initiated.") load_pywps_config(settings) job.progress = JobProgress.SETUP job.task_id = task.request.id job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.). # If terminate signal is sent to worker task via API dismiss request while still running in background, # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'. # Patch it back to Status.DISMISSED in this case. task_terminated = True try: job.progress = JobProgress.DESCRIBE job.save_log(logger=task_logger, message=f"Employed WPS URL: [{wps_url!s}]", level=logging.DEBUG) job.save_log( logger=task_logger, message=f"Execute WPS request for process [{job.process!s}]") wps_process = fetch_wps_process(job, wps_url, headers, settings) # prepare inputs job.progress = JobProgress.GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") wps_inputs = parse_wps_inputs(wps_process, job) # prepare outputs job.progress = JobProgress.GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in wps_process.processOutputs] # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request if job.is_local: process = None # already got all the information needed pre-loaded in PyWPS service else: service = Service(name=job.service, url=wps_url) process = Process.from_ows(wps_process, service, settings) job.progress = JobProgress.EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job, wps_inputs=wps_inputs, wps_outputs=wps_outputs, remote_process=process, headers=headers) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JobProgress.EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log( logger=task_logger, level=logging.DEBUG, message=f"Updated job status location: [{wps_status_path}].") job.status = Status.RUNNING job.status_message = execution.statusMessage or f"{job!s} initiation done." job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JobProgress.EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) raise Exception( f"Could not read status document after {max_retries} retries. Giving up." ) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. progress_min = JobProgress.EXECUTE_MONITOR_LOOP progress_max = JobProgress.EXECUTE_MONITOR_DONE job.progress = progress_min run_delay = wait_secs(run_step) execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=run_delay) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job_status_msg = job_msg or "n/a" job_percent = execution.percentCompleted job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})." if execution.isComplete(): msg_progress = f" (status: {job_msg})" if job_msg else "" if execution.isSucceded(): wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status = map_status(Status.SUCCEEDED) job.status_message = f"Job succeeded{msg_progress}." job.progress = progress_max job.save_log(logger=task_logger) job_results = [ ows2json_output_data(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status_message = f"Job failed{msg_progress}." job.progress = progress_max job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) job = store.update_job(job) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = f"Could not read status XML document for {job!s}. Trying again..." job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) sleep(1) else: num_retries = 0 run_step += 1 finally: task_terminated = False # reached only if WPS execution completed (worker not terminated beforehand) job = store.update_job(job) except Exception as exc: # if 'execute_job' finishes quickly before even reaching the 'monitoring loop' # consider WPS execution produced an error (therefore Celery worker not terminated) task_terminated = False LOGGER.exception("Failed running [%s]", job) LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc) # note: don't update the progress here to preserve last one that was set job.status = map_status(Status.FAILED) job.status_message = f"Failed to run {job!s}." errors = f"{fully_qualified_name(exc)}: {exc!s}" job.save_log(errors=errors, logger=task_logger) job = store.update_job(job) finally: # if task worker terminated, local 'job' is out of date compared to remote/background runner last update job = store.fetch_by_id(job.id) if task_terminated and map_status(job.status) == Status.FAILED: job.status = Status.DISMISSED task_success = map_status( job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED] collect_statistics(task_process, settings, job, rss_start) if task_success: job.progress = JobProgress.EXECUTE_MONITOR_END job.status_message = f"Job {job.status}." job.save_log(logger=task_logger) if task_success: job.progress = JobProgress.NOTIFY send_job_complete_notification_email(job, task_logger, settings) if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: job.status = Status.SUCCEEDED job.status_message = f"Job {job.status}." job.mark_finished() if task_success: job.progress = JobProgress.DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status
def test_map_status_pywps_compliant(): for sv in status.JOB_STATUS_VALUES: for s in get_status_variations(sv): assert status.map_status(s, status.STATUS_COMPLIANT_PYWPS) in \ status.JOB_STATUS_CATEGORIES[status.STATUS_COMPLIANT_PYWPS]
def submit_job_handler( payload, # type: JSON settings, # type: SettingsType service_url, # type: str provider_id=None, # type: Optional[str] process_id=None, # type: str is_workflow=False, # type: bool is_local=True, # type: bool visibility=None, # type: Optional[AnyVisibility] language=None, # type: Optional[str] headers=None, # type: Optional[HeaderCookiesType] tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] ): # type: (...) -> AnyResponseType """ Submits the job to the Celery worker with provided parameters. Assumes that parameters have been pre-fetched and validated, except for the input payload. """ try: json_body = sd.Execute().deserialize(payload) except colander.Invalid as ex: raise HTTPBadRequest(f"Invalid schema: [{ex!s}]") db = get_db(settings) headers = headers or {} if is_local: proc_store = db.get_store(StoreProcesses) process = proc_store.fetch_by_id(process_id) job_ctl_opts = process.jobControlOptions else: job_ctl_opts = ExecuteControlOption.values() max_wait = as_int(settings.get("weaver.exec_sync_max_wait"), default=20) mode, wait, applied = parse_prefer_header_execute_mode( headers, job_ctl_opts, max_wait) get_header("prefer", headers, pop=True) if not applied: # whatever returned is a default, consider 'mode' in body as alternative is_execute_async = ExecuteMode.get( json_body.get("mode")) != ExecuteMode.SYNC # convert auto to async else: # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 # Prefer header not resolve as valid still proces is_execute_async = mode != ExecuteMode.SYNC exec_resp = json_body.get("response") notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process_id, service=provider_id, inputs=json_body.get("inputs"), outputs=json_body.get("outputs"), is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, context=context, execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, notification_email=encrypted_email, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) location_url = job.status_url(settings) resp_headers = {"Location": location_url} resp_headers.update(applied) wps_url = clean_ows_url(service_url) result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) # type: CeleryResult LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) if not is_execute_async: LOGGER.debug( "Celery task requested as sync if it completes before (wait=%ss)", wait) try: result.wait(timeout=wait) except CeleryTaskTimeoutError: pass if result.ready(): job = store.fetch_by_id(job.id) # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response if job.status == Status.SUCCEEDED: return get_job_results_response(job, settings, headers=resp_headers) # otherwise return the error status body = job.json(container=settings, self_link="status") body["location"] = location_url resp = get_job_submission_response(body, resp_headers, error=True) return resp else: LOGGER.debug( "Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) # sync not respected, therefore must drop it # since both could be provided as alternative preferences, drop only async with limited subset prefer = get_header("Preference-Applied", headers, pop=True) _, _, async_applied = parse_prefer_header_execute_mode( {"Prefer": prefer}, [ExecuteMode.ASYNC]) if async_applied: resp_headers.update(async_applied) LOGGER.debug("Celery task submitted to run async.") body = { "jobID": job.id, "processID": job.process, "providerID": provider_id, # dropped by validator if not applicable "status": map_status(Status.ACCEPTED), "location": location_url } resp = get_job_submission_response(body, resp_headers) return resp
def execute(self, workflow_inputs, out_dir, expected_outputs): # TODO: test visible = self.is_visible() if not visible: # includes private visibility and non-existing cases if visible is None: LOGGER.info( "Process [%s] access is unauthorized on [%s] - deploying as admin.", self.process, self.url) elif visible is False: LOGGER.info( "Process [%s] is not deployed on [%s] - deploying.", self.process, self.url) # TODO: Maybe always redeploy? What about cases of outdated deployed process? try: self.deploy() except Exception as exc: # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed pass_http_error(exc, [HTTPConflict, HTTPInternalServerError]) LOGGER.info("Process [%s] enforced to public visibility.", self.process) try: self.set_visibility(visibility=VISIBILITY_PUBLIC) # TODO: support for Spacebel, remove when visibility route properly implemented on ADES except Exception as exc: pass_http_error(exc, HTTPNotFound) self.update_status("Preparing execute request for remote ADES.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for [%s]", self.process) execute_body_inputs = [] execute_req_id = "id" execute_req_input_val_href = "href" execute_req_input_val_data = "data" for workflow_input_key, workflow_input_value in workflow_inputs.items( ): if isinstance(workflow_input_value, list): for workflow_input_value_item in workflow_input_value: if isinstance( workflow_input_value_item, dict) and "location" in workflow_input_value_item: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value_item["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value_item }) else: if isinstance(workflow_input_value, dict) and "location" in workflow_input_value: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value }) for exec_input in execute_body_inputs: if execute_req_input_val_href in exec_input and isinstance( exec_input[execute_req_input_val_href], str): if exec_input[execute_req_input_val_href].startswith( "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)): exec_input[execute_req_input_val_href] = "file{0}".format( exec_input[execute_req_input_val_href] [len(OPENSEARCH_LOCAL_FILE_SCHEME):]) elif exec_input[execute_req_input_val_href].startswith( "file://"): exec_input[execute_req_input_val_href] = self.host_file( exec_input[execute_req_input_val_href]) LOGGER.debug("Hosting intermediate input [%s] : [%s]", exec_input[execute_req_id], exec_input[execute_req_input_val_href]) execute_body_outputs = [{ execute_req_id: output, "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE } for output in expected_outputs] self.update_status("Executing job on remote ADES.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) execute_body = dict(mode=EXECUTE_MODE_ASYNC, response=EXECUTE_RESPONSE_DOCUMENT, inputs=execute_body_inputs, outputs=execute_body_outputs) request_url = self.url + process_jobs_uri.format( process_id=self.process) response = self.make_request(method="POST", url=request_url, json=execute_body, retry=True) if response.status_code != 201: raise Exception( "Was expecting a 201 status code from the execute request : {0}" .format(request_url)) job_status_uri = response.headers["Location"] job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) self.update_status( "Monitoring job on remote ADES : {0}".format(job_status_uri), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) while job_status_value not in status.JOB_STATUS_CATEGORIES[ status.STATUS_CATEGORY_FINISHED]: sleep(5) job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) self.update_status( get_job_log_msg(status=job_status_value, message=get_any_message(job_status), progress=job_status.get("percentCompleted", 0), duration=job_status.get( "duration", None)), # get if available map_progress(job_status.get("percentCompleted", 0), REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) if job_status_value != status.STATUS_SUCCEEDED: LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) raise Exception(job_status) self.update_status("Fetching job outputs from remote ADES.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = self.get_job_results(job_status["jobID"]) for result in results: if get_any_id(result) in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([ out_dir.rstrip("/"), expected_outputs[get_any_id(result)] ]) # TODO Should we handle other type than File reference? resp = request_extra("get", get_any_value(result), allow_redirects=True, settings=self.settings) LOGGER.debug( "Fetching result output from [%s] to cwl output destination: [%s]", get_any_value(result), dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) self.update_status("Execution on remote ADES completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def get_job_log_msg(status, message, progress=0, duration=None): # type: (str, str, Optional[Number], Optional[str]) -> str return "{d} {p:3d}% {s:10} {m}".format(d=duration or "", p=int(progress or 0), s=map_status(status), m=message)
def test_map_status_owslib_compliant(): known_statuses = set(Status.values()) - {Status.UNKNOWN} for sv in known_statuses: for s in get_status_variations(sv): assert map_status(s, StatusCompliant.OWSLIB) in JOB_STATUS_CATEGORIES[StatusCompliant.OWSLIB]
def test_map_status_back_compatibility_and_special_cases(): for c in StatusCompliant: assert map_status("successful", c) == Status.SUCCEEDED
def monitor(self, monitor_reference): # type: (JobExecution) -> bool execution = monitor_reference["execution"] max_retries = 20 # using 'wait_secs' incremental delays, this is ~3min of retry attempts num_retries = 0 run_step = 0 job_id = "<undefined>" log_progress = Wps1RemoteJobProgress.MONITOR while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( f"Could not read status document after {max_retries} retries. Giving up." ) try: execution = check_wps_status(location=execution.statusLocation, sleep_secs=wait_secs(run_step), settings=self.settings) monitor_reference[ "execution"] = execution # update reference for later stages job_id = execution.statusLocation.split("/")[-1].replace( ".xml", "") exec_status = map_status(execution.getStatus()) LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, execution.statusMessage, execution.statusLocation)) log_msg = get_job_log_msg(status=exec_status, message=execution.statusMessage, progress=execution.percentCompleted, duration=None) # get if available log_progress = map_progress(execution.percentCompleted, Wps1RemoteJobProgress.MONITOR, Wps1RemoteJobProgress.RESULTS) self.update_status(log_msg, log_progress, Status.RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." exec_status = map_status(execution.getStatus()) exec_status_url = execution.statusLocation LOGGER.debug( get_log_monitor_msg(job_id, exec_status, execution.percentCompleted, exec_msg, exec_status_url)) # provide more details in logs of parent job process about the cause of the failing remote execution xml_err = bytes2str(xml_util.tostring(execution.response)) xml_exc = get_exception_from_xml_status(execution.response) self.update_status( f"Retrieved error status response from WPS remote provider on [{exec_status_url}]:\n{xml_err}\n", log_progress, Status.FAILED, error=xml_exc) return False return True
def execute_process(self, job_id, url, headers=None): from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") settings = get_settings(app) task_logger = get_task_logger(__name__) load_pywps_config(settings) task_logger.debug("Job task setup.") # reset the connection because we are in a forked celery process db = get_db(app, reset_connection=True) store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.task_id = self.request.id job.progress = JOB_PROGRESS_SETUP job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) try: try: job.progress = JOB_PROGRESS_DESCRIBE job.save_log( logger=task_logger, message="Execute WPS request for process [{!s}]".format( job.process)) ssl_verify = get_ssl_verify_option("get", url, settings=settings) wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) set_wps_language(wps, accept_language=job.accept_language) raise_on_xml_exception(wps._capabilities) # noqa except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS capabilities. Error: [{}].".format( str(ex))) try: process = wps.describeprocess(job.process) except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS process description. Error: [{}].". format(str(ex))) # prepare inputs job.progress = JOB_PROGRESS_GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() for process_input in job.inputs: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance( process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ val[7:] if str(val).startswith("file://") else val for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values ]) except KeyError: wps_inputs = [] # prepare outputs job.progress = JOB_PROGRESS_GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC job.progress = JOB_PROGRESS_EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, mode=mode, job_uuid=job.id) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log(logger=task_logger, level=logging.DEBUG, message="Updated job status location: [{}].".format( wps_status_path)) job.status = map_status(STATUS_STARTED) job.status_message = execution.statusMessage or "{} initiation done.".format( str(job)) job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ .format(execution.percentCompleted, job_msg or "n/a") # job.save_log(logger=task_logger) # job = store.update_job(job) if execution.isComplete(): job.mark_finished() job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END msg_progress = " (status: {})".format( job_msg) if job_msg else "" if execution.isSucceded(): job.status = map_status(STATUS_SUCCEEDED) job.status_message = "Job succeeded{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(logger=task_logger) job_results = [ ows2json_output(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") job.status_message = "Job failed{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = "Could not read status XML document for {!s}. Trying again...".format( job) job.save_log(errors=execution.errors, logger=task_logger) sleep(1) else: # job.status_message = "Update {}...".format(str(job)) # job.save_log(logger=task_logger) num_retries = 0 run_step += 1 finally: job = store.update_job(job) except Exception as exc: LOGGER.exception("Failed running [%s]", job) job.status = map_status(STATUS_FAILED) job.status_message = "Failed to run {!s}.".format(job) job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) job.save_log(errors=errors, logger=task_logger) finally: job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END job.status_message = "Job {}.".format(job.status) job.save_log(logger=task_logger) # Send email if requested if job.notification_email is not None: job.progress = JOB_PROGRESS_NOTIFY try: notify_job_complete(job, job.notification_email, settings) message = "Notification email sent successfully." job.save_log(logger=task_logger, message=message) except Exception as exc: exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) exception = "{0}: {1!s}".format(exception_class, exc) message = "Couldn't send notification email ({})".format( exception) job.save_log(errors=message, logger=task_logger, message=message) job.progress = JOB_PROGRESS_DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status