def stage_results(self, results, expected_outputs, out_dir): # type: (JobResults, CWL_ExpectedOutputs, str) -> None """ Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory. This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`. .. note:: The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``, but this definition could be a glob pattern to match multiple file and/or nested directories. We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``). .. seealso:: Function :func:`weaver.processes.convert.any2cwl_io` defines a generic glob pattern using the output ID and expected file extension based on Content-Type format. Since the remote :term:`WPS` :term:`Process` doesn't necessarily produces file names with the output ID as expected to find them (could be anything), staging must patch locations to let :term:`CWL` runtime resolve the files according to glob definitions. .. warning:: Only remote :term:`Provider` implementations (which auto-generate a pseudo :term:`CWL` to map components) that produce outputs with inconsistent file names as described above should set attribute :attr:`WpsProcessInterface.stage_output_id_nested` accordingly. For :term:`Process` that directly provide an actual :term:`CWL` :term:`Application Package` definition (e.g.: Docker application), auto-mapping of glob patterns should be avoided, as it is expected that the :term:`CWL` contains real mapping to be respected for correct execution and retrieval of outputs from the application. """ for result in results: res_id = get_any_id(result) if res_id not in expected_outputs: continue # plan ahead when list of multiple output values could be supported result_values = get_any_value(result) if not isinstance(result_values, list): result_values = [result_values] if self.stage_output_id_nested: cwl_out_dir = "/".join([out_dir.rstrip("/"), res_id]) else: cwl_out_dir = out_dir.rstrip("/") os.makedirs(cwl_out_dir, mode=0o700, exist_ok=True) for value in result_values: src_name = value.split("/")[-1] dst_path = "/".join([cwl_out_dir, src_name]) # performance improvement: # Bypass download if file can be resolved as local resource (already fetched or same server). # Because CWL expects the file to be in specified 'out_dir', make a link for it to be found # even though the file is stored in the full job output location instead (already staged by step). map_path = map_wps_output_location(value, self.settings) as_link = False if map_path: LOGGER.info( "Detected result [%s] from [%s] as local reference to this instance. " "Skipping fetch and using local copy in output destination: [%s]", res_id, value, dst_path) LOGGER.debug("Mapped result [%s] to local reference: [%s]", value, map_path) src_path = map_path as_link = True else: LOGGER.info( "Fetching result [%s] from [%s] to CWL output destination: [%s]", res_id, value, dst_path) src_path = value fetch_file(src_path, cwl_out_dir, settings=self.settings, link=as_link)
def collect_statistics(process, settings=None, job=None, rss_start=None): # type: (Optional[psutil.Process], Optional[SettingsType], Optional[Job], Optional[int]) -> Optional[Statistics] """ Collect any available execution statistics and store them in the :term:`Job` if provided. """ try: mem_used = None if job: mem_info = list( filter( lambda line: "cwltool" in line and "memory used" in line, job.logs)) mem_used = None if mem_info: mem_info = mem_info[0].split(":")[-1].strip() mem_used = parse_number_with_unit(mem_info, binary=True) stats = {} # type: JSON if mem_used: stats["application"] = { # see: 'cwltool.job.JobBase.process_monitor', reported memory in logs uses 'rss' "usedMemory": apply_number_with_unit(mem_used, binary=True), "usedMemoryBytes": mem_used, } rss = None if process: proc_info = process.memory_full_info() rss = getattr(proc_info, "rss", 0) uss = getattr(proc_info, "uss", 0) vms = getattr(proc_info, "vms", 0) stats["process"] = { "rss": apply_number_with_unit(rss, binary=True), "rssBytes": rss, "uss": apply_number_with_unit(uss, binary=True), "ussBytes": uss, "vms": apply_number_with_unit(vms, binary=True), "vmsBytes": vms, } fields = [("usedThreads", "num_threads"), ("usedCPU", "cpu_num"), ("usedHandles", "num_handles")] for field, method in fields: func = getattr(process, method, None) stats["process"][field] = func() if func is not None else 0 if rss_start and rss: # diff of RSS between start/end to consider only execution of the job steps # this more accurately reports used memory by the execution itself, omitting celery worker's base memory rss_diff = rss - rss_start stats["process"]["usedMemory"] = apply_number_with_unit( rss_diff, binary=True) stats["process"]["usedMemoryBytes"] = rss_diff total_size = 0 if job: stats["outputs"] = {} for result in job.results: res_ref = get_any_value(result, file=True) if res_ref and isinstance(res_ref, str): if res_ref.startswith( f"/{job.id}"): # pseudo-relative reference out_dir = get_wps_output_dir(settings) res_ref = os.path.join(out_dir, res_ref[1:]) if os.path.isfile(res_ref): res_stat = os.stat(res_ref) res_id = get_any_id(result) res_size = res_stat.st_size stats["outputs"][res_id] = { "size": apply_number_with_unit(res_size, binary=True), "sizeBytes": res_size, } total_size += res_size stats["process"]["totalSize"] = apply_number_with_unit(total_size, binary=True) stats["process"]["totalSizeBytes"] = total_size if stats and job: job.statistics = stats return stats or None except Exception as exc: # pragma: no cover LOGGER.warning( "Ignoring error that occurred during statistics collection [%s]", str(exc), exc_info=exc)
def parse_wps_inputs(wps_process, job): # type: (ProcessOWS, Job) -> List[Tuple[str, OWS_Input_Type]] """ Parses expected WPS process inputs against submitted job input values considering supported process definitions. """ complex_inputs = {} # type: Dict[str, ComplexInput] for process_input in wps_process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs[process_input.identifier] = process_input try: wps_inputs = [] # parse both dict and list type inputs job_inputs = job.inputs.items() if isinstance( job.inputs, dict) else job.get("inputs", []) for job_input in job_inputs: if isinstance(job_input, tuple): input_id = job_input[0] input_val = job_input[1] job_input = input_val else: input_id = get_any_id(job_input) input_val = get_any_value(job_input) # in case of array inputs, must repeat (id,value) if isinstance(input_val, list): input_values = input_val input_details = input_val # each value has its own metadata else: input_values = [input_val] input_details = [ job_input ] # metadata directly in definition, not nested per array value # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ # when value is an array of dict that each contain a file reference (get_any_value(val)[7:] if str(get_any_value(val)).startswith( "file://") else get_any_value(val)) if isinstance( val, dict) else # when value is directly a single dict with file reference (val[7:] if str(val).startswith("file://") else val) for val in input_values ] for input_value, input_detail in zip(input_values, input_details): # need to use ComplexDataInput structure for complex input if input_id in complex_inputs: # if provided, pass down specified data input format to allow validation against supported formats ctype = get_field(input_detail, "type", default=None) encoding = None if not ctype: media_format = get_field(input_detail, "format", default=None) if isinstance(media_format, dict): ctype = get_field(input_detail, "mime_type", search_variations=True, default=None) encoding = get_field(input_detail, "encoding", search_variations=True, default=None) wps_inputs.append((input_id, ComplexDataInput(input_value, mimeType=ctype, encoding=encoding))) # need to use literal String for anything else than complex # FIXME: pre-validate allowed literal values? # TODO: BoundingBox not supported else: wps_inputs.append((input_id, str(input_value))) except KeyError: wps_inputs = [] return wps_inputs
def get_processes(request): # type: (PyramidRequest) -> AnyViewResponse """ List registered processes (GetCapabilities). Optionally list both local and provider processes. """ try: params = sd.GetProcessesQuery().deserialize(request.params) except colander.Invalid as ex: raise HTTPBadRequest(json={ "code": "ProcessInvalidParameter", "description": "Process query parameters failed validation.", "error": colander.Invalid.__name__, "cause": str(ex), "value": repr_json(ex.value or dict(request.params), force_string=False), }) detail = asbool(params.get("detail", True)) ignore = asbool(params.get("ignore", True)) try: # get local processes and filter according to schema validity # (previously deployed process schemas can become invalid because of modified schema definitions results = get_processes_filtered_by_valid_schemas(request) processes, invalid_processes, paging, with_providers, total_processes = results if invalid_processes: raise HTTPServiceUnavailable( "Previously deployed processes are causing invalid schema integrity errors. " f"Manual cleanup of following processes is required: {invalid_processes}" ) body = {"processes": processes if detail else [get_any_id(p) for p in processes]} # type: JSON if not with_providers: paging = {"page": paging.get("page"), "limit": paging.get("limit")} # remove other params body.update(paging) else: paging = {} # disable to remove paging-related links try: body["links"] = get_process_list_links(request, paging, total_processes) except IndexError as exc: raise HTTPBadRequest(json={ "description": str(exc), "cause": "Invalid paging parameters.", "error": type(exc).__name__, "value": repr_json(paging, force_string=False) }) # if 'EMS/HYBRID' and '?providers=True', also fetch each provider's processes if with_providers: # param 'check' enforced because must fetch for listing of available processes (GetCapabilities) # when 'ignore' is not enabled, any failing definition should raise any derived 'ServiceException' services = get_provider_services(request, ignore=ignore, check=True) body.update({ "providers": [svc.summary(request, ignore=ignore) if detail else {"id": svc.name} for svc in services] }) invalid_services = [False] * len(services) for i, provider in enumerate(services): # ignore failing parsing of the service description if body["providers"][i] is None: invalid_services[i] = True continue # attempt parsing available processes and ignore again failing items processes = provider.processes(request, ignore=ignore) if processes is None: invalid_services[i] = True continue total_processes += len(processes) body["providers"][i].update({ "processes": processes if detail else [get_any_id(proc) for proc in processes] }) if any(invalid_services): LOGGER.debug("Invalid providers dropped due to failing parsing and ignore query: %s", [svc.name for svc, status in zip(services, invalid_services) if status]) body["providers"] = [svc for svc, ignore in zip(body["providers"], invalid_services) if not ignore] body["total"] = total_processes body["description"] = sd.OkGetProcessesListResponse.description LOGGER.debug("Process listing generated, validating schema...") body = sd.MultiProcessesListing().deserialize(body) return HTTPOk(json=body) except ServiceException as exc: LOGGER.debug("Error when listing provider processes using query parameter raised: [%s]", exc, exc_info=exc) raise HTTPServiceUnavailable(json={ "description": "At least one provider could not list its processes. " "Failing provider errors were requested to not be ignored.", "exception": fully_qualified_name(exc), "error": str(exc) }) except HTTPException: raise except colander.Invalid as exc: raise HTTPBadRequest(json={ "type": "InvalidParameterValue", "title": "Invalid parameter value.", "description": "Submitted request parameters are invalid or could not be processed.", "cause": clean_json_text_body(f"Invalid schema: [{exc.msg or exc!s}]"), "error": exc.__class__.__name__, "value": repr_json(exc.value, force_string=False), })
def get_eo_images_ids_from_payload(payload): return [get_any_id(i) for i in get_eo_images_inputs_from_payload(payload)]
def execute(self, workflow_inputs, out_dir, expected_outputs): # TODO: test visible = self.is_visible() if not visible: # includes private visibility and non-existing cases if visible is None: LOGGER.info( "Process [%s] access is unauthorized on [%s] - deploying as admin.", self.process, self.url) elif visible is False: LOGGER.info( "Process [%s] is not deployed on [%s] - deploying.", self.process, self.url) # TODO: Maybe always redeploy? What about cases of outdated deployed process? try: self.deploy() except Exception as exc: # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed pass_http_error(exc, [HTTPConflict, HTTPInternalServerError]) LOGGER.info("Process [%s] enforced to public visibility.", self.process) try: self.set_visibility(visibility=VISIBILITY_PUBLIC) # TODO: support for Spacebel, remove when visibility route properly implemented on ADES except Exception as exc: pass_http_error(exc, HTTPNotFound) self.update_status("Preparing execute request for remote ADES.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for [%s]", self.process) execute_body_inputs = [] execute_req_id = "id" execute_req_input_val_href = "href" execute_req_input_val_data = "data" for workflow_input_key, workflow_input_value in workflow_inputs.items( ): if isinstance(workflow_input_value, list): for workflow_input_value_item in workflow_input_value: if isinstance( workflow_input_value_item, dict) and "location" in workflow_input_value_item: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value_item["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value_item }) else: if isinstance(workflow_input_value, dict) and "location" in workflow_input_value: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value }) for exec_input in execute_body_inputs: if execute_req_input_val_href in exec_input and isinstance( exec_input[execute_req_input_val_href], str): if exec_input[execute_req_input_val_href].startswith( "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)): exec_input[execute_req_input_val_href] = "file{0}".format( exec_input[execute_req_input_val_href] [len(OPENSEARCH_LOCAL_FILE_SCHEME):]) elif exec_input[execute_req_input_val_href].startswith( "file://"): exec_input[execute_req_input_val_href] = self.host_file( exec_input[execute_req_input_val_href]) LOGGER.debug("Hosting intermediate input [%s] : [%s]", exec_input[execute_req_id], exec_input[execute_req_input_val_href]) execute_body_outputs = [{ execute_req_id: output, "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE } for output in expected_outputs] self.update_status("Executing job on remote ADES.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) execute_body = dict(mode=EXECUTE_MODE_ASYNC, response=EXECUTE_RESPONSE_DOCUMENT, inputs=execute_body_inputs, outputs=execute_body_outputs) request_url = self.url + process_jobs_uri.format( process_id=self.process) response = self.make_request(method="POST", url=request_url, json=execute_body, retry=True) if response.status_code != 201: raise Exception( "Was expecting a 201 status code from the execute request : {0}" .format(request_url)) job_status_uri = response.headers["Location"] job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) self.update_status( "Monitoring job on remote ADES : {0}".format(job_status_uri), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) while job_status_value not in status.JOB_STATUS_CATEGORIES[ status.STATUS_CATEGORY_FINISHED]: sleep(5) job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) self.update_status( get_job_log_msg(status=job_status_value, message=get_any_message(job_status), progress=job_status.get("percentCompleted", 0), duration=job_status.get( "duration", None)), # get if available map_progress(job_status.get("percentCompleted", 0), REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) if job_status_value != status.STATUS_SUCCEEDED: LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) raise Exception(job_status) self.update_status("Fetching job outputs from remote ADES.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = self.get_job_results(job_status["jobID"]) for result in results: if get_any_id(result) in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([ out_dir.rstrip("/"), expected_outputs[get_any_id(result)] ]) # TODO Should we handle other type than File reference? resp = request_extra("get", get_any_value(result), allow_redirects=True, settings=self.settings) LOGGER.debug( "Fetching result output from [%s] to cwl output destination: [%s]", get_any_value(result), dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) self.update_status("Execution on remote ADES completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def get_results( job, # type: Job container, # type: AnySettingsContainer value_key=None, # type: Optional[str] schema=JobInputsOutputsSchema.OLD, # type: JobInputsOutputsSchemaType link_references=False, # type: bool ): # type: (...) -> Tuple[ExecutionResults, HeadersTupleType] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. :param job: job from which to retrieve results. :param container: any container giving access to instance settings (to resolve reference output location). :param value_key: If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. Otherwise, all values will have the specified key. :param schema: Selects which schema to employ for representing the output results (listing or mapping). :param link_references: If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference. :returns: Tuple with: - List or mapping of all outputs each with minimally an ID and value under the requested key. - List of ``Link`` headers for reference outputs when requested. Empty otherwise. """ settings = get_settings(container) wps_url = get_wps_output_url(settings) if not wps_url.endswith("/"): wps_url = wps_url + "/" schema = JobInputsOutputsSchema.get(str(schema).lower(), default=JobInputsOutputsSchema.OLD) strict = schema.endswith("+strict") schema = schema.split("+")[0] ogc_api = schema == JobInputsOutputsSchema.OGC outputs = {} if ogc_api else [] fmt_key = "mediaType" if ogc_api else "mimeType" out_ref = convert_output_params_schema( job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} references = {} for result in job.results: rtype = "data" if any(k in result for k in ["data", "value"]) else "href" value = get_any_value(result) out_key = rtype out_id = get_any_id(result) out_mode = out_ref.get(out_id, {}).get("transmissionMode") as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE if rtype == "href": # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) if value.startswith("/"): value = str(value).lstrip("/") if "://" not in value: value = wps_url + value elif ogc_api: out_key = "value" elif value_key: out_key = value_key output = {out_key: value} if rtype == "href": # required for the rest to be there, other fields optional if "mimeType" not in result: result["mimeType"] = get_format( value, default=ContentType.TEXT_PLAIN).mime_type if ogc_api or not strict: output["type"] = result["mimeType"] if not ogc_api or not strict or as_ref: output["format"] = {fmt_key: result["mimeType"]} for field in ["encoding", "schema"]: if field in result: output["format"][field] = result[field] elif rtype != "href": # literal data # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) dtype = result.get( "dataType", any2wps_literal_datatype(value, is_value=True) or "string") if ogc_api: output["dataType"] = {"name": dtype} else: output["dataType"] = dtype if ogc_api or as_ref: mapping = references if as_ref else outputs if out_id in mapping: output_list = mapping[out_id] if not isinstance(output_list, list): output_list = [output_list] output_list.append(output) mapping[out_id] = output_list else: mapping[out_id] = output else: # if ordered insert supported by python version, insert ID first output = dict([("id", out_id)] + list(output.items())) # noqa outputs.append(output) # needed to collect and aggregate outputs of same ID first in case of array # convert any requested link references using indices if needed headers = [] for out_id, output in references.items(): res_links = make_result_link(out_id, output, job.id, settings) headers.extend([("Link", link) for link in res_links]) return outputs, headers
def execute_process(self, job_id, url, headers=None): from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") settings = get_settings(app) task_logger = get_task_logger(__name__) load_pywps_config(settings) task_logger.debug("Job task setup.") # reset the connection because we are in a forked celery process db = get_db(app, reset_connection=True) store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.task_id = self.request.id job.progress = JOB_PROGRESS_SETUP job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) try: try: job.progress = JOB_PROGRESS_DESCRIBE job.save_log( logger=task_logger, message="Execute WPS request for process [{!s}]".format( job.process)) ssl_verify = get_ssl_verify_option("get", url, settings=settings) wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) set_wps_language(wps, accept_language=job.accept_language) raise_on_xml_exception(wps._capabilities) # noqa except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS capabilities. Error: [{}].".format( str(ex))) try: process = wps.describeprocess(job.process) except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS process description. Error: [{}].". format(str(ex))) # prepare inputs job.progress = JOB_PROGRESS_GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() for process_input in job.inputs: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance( process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ val[7:] if str(val).startswith("file://") else val for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values ]) except KeyError: wps_inputs = [] # prepare outputs job.progress = JOB_PROGRESS_GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC job.progress = JOB_PROGRESS_EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, mode=mode, job_uuid=job.id) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log(logger=task_logger, level=logging.DEBUG, message="Updated job status location: [{}].".format( wps_status_path)) job.status = map_status(STATUS_STARTED) job.status_message = execution.statusMessage or "{} initiation done.".format( str(job)) job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ .format(execution.percentCompleted, job_msg or "n/a") # job.save_log(logger=task_logger) # job = store.update_job(job) if execution.isComplete(): job.mark_finished() job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END msg_progress = " (status: {})".format( job_msg) if job_msg else "" if execution.isSucceded(): job.status = map_status(STATUS_SUCCEEDED) job.status_message = "Job succeeded{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(logger=task_logger) job_results = [ ows2json_output(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") job.status_message = "Job failed{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = "Could not read status XML document for {!s}. Trying again...".format( job) job.save_log(errors=execution.errors, logger=task_logger) sleep(1) else: # job.status_message = "Update {}...".format(str(job)) # job.save_log(logger=task_logger) num_retries = 0 run_step += 1 finally: job = store.update_job(job) except Exception as exc: LOGGER.exception("Failed running [%s]", job) job.status = map_status(STATUS_FAILED) job.status_message = "Failed to run {!s}.".format(job) job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) job.save_log(errors=errors, logger=task_logger) finally: job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END job.status_message = "Job {}.".format(job.status) job.save_log(logger=task_logger) # Send email if requested if job.notification_email is not None: job.progress = JOB_PROGRESS_NOTIFY try: notify_job_complete(job, job.notification_email, settings) message = "Notification email sent successfully." job.save_log(logger=task_logger, message=message) except Exception as exc: exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) exception = "{0}: {1!s}".format(exception_class, exc) message = "Couldn't send notification email ({})".format( exception) job.save_log(errors=message, logger=task_logger, message=message) job.progress = JOB_PROGRESS_DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status