def make_result_link(result_id, result, job_id, settings): # type: (str, Union[ExecutionResultObject, ExecutionResultArray], AnyUUID, SettingsType) -> List[str] """ Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. .. seealso:: :rfc:`8288`: HTTP ``Link`` header specification. """ values = result if isinstance(result, list) else [result] suffixes = list( f".{idx}" for idx in range(len(values))) if isinstance(result, list) else [""] wps_url = get_wps_output_url(settings).strip("/") links = [] for suffix, value in zip(suffixes, values): key = get_any_value(result, key=True) if key != "href": # literal data to be converted to link # plain text file must be created containing the raw literal data typ = ContentType.TEXT_PLAIN # as per '/rec/core/process-execute-sync-document-ref' enc = "UTF-8" out = get_wps_output_dir(settings) val = get_any_value(value, data=True, file=False) loc = os.path.join(job_id, result_id + suffix + ".txt") url = f"{wps_url}/{loc}" path = os.path.join(out, loc) with open(path, mode="w", encoding=enc) as out_file: out_file.write(val) else: fmt = get_field(result, "format", default={"mediaType": ContentType.TEXT_PLAIN}) typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) enc = get_field(fmt, "encoding", search_variations=True, default=None) url = get_any_value(value, data=False, file=True) # should already include full path if fmt == ContentType.TEXT_PLAIN and not enc: # only if text, otherwise binary content could differ enc = "UTF-8" # default both omit/empty encoding = f"; charset={enc}" if enc else "" links.append( f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}{encoding}") return links
def get_results(self, monitor_reference): # type: (str) -> JobResults """ Obtains produced output results from successful job status ID. """ # use '/results' endpoint instead of '/outputs' to ensure support with other result_url = monitor_reference + "/results" response = self.make_request(method="GET", url=result_url, retry=True) response.raise_for_status() contents = response.json() # backward compatibility for ADES that returns output IDs nested under 'outputs' if "outputs" in contents: # ensure that we don't incorrectly pick a specific output ID named 'outputs' maybe_outputs = contents["outputs"] if isinstance(maybe_outputs, dict) and get_any_id(maybe_outputs) is None: contents = maybe_outputs # backward compatibility for ADES that returns list of outputs nested under 'outputs' # (i.e.: as Weaver-specific '/outputs' endpoint) elif isinstance(maybe_outputs, list) and all( get_any_id(out) is not None for out in maybe_outputs): contents = maybe_outputs # rebuild the expected (old) list format for calling method if isinstance(contents, dict) and all( get_any_value(out) is not None for out in contents.values()): outputs = [] for out_id, out_val in contents.items(): out_val.update({"id": out_id}) outputs.append(out_val) contents = outputs return contents
def validate_outputs(self, job_id, result_payload, result_file_content): # check that output is HTTP reference to file output_values = {out["id"]: get_any_value(out) for out in result_payload["outputs"]} assert len(output_values) == 1 wps_uuid = self.job_store.fetch_by_id(job_id).wps_id wps_out_path = "{}{}".format(self.settings["weaver.url"], self.settings["weaver.wps_output_path"]) wps_output = "{}/{}/{}".format(wps_out_path, wps_uuid, self.out_file) assert output_values[self.out_key] == wps_output # check that actual output file was created in expected location along with XML job status wps_outdir = self.settings["weaver.wps_output_dir"] wps_out_file = os.path.join(wps_outdir, job_id, self.out_file) assert not os.path.exists(os.path.join(wps_outdir, self.out_file)), \ "File is expected to be created in sub-directory of Job ID, not directly in WPS output directory." # job log, XML status and output directory can be retrieved with both Job UUID and underlying WPS UUID reference assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(wps_uuid))) assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(wps_uuid))) assert os.path.isfile(os.path.join(wps_outdir, wps_uuid, self.out_file)) assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(job_id))) assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(job_id))) assert os.path.isfile(wps_out_file) # validate content with open(wps_out_file) as res_file: assert res_file.read() == result_file_content
def format_inputs(self, workflow_inputs): # type: (CWL_RuntimeInputList) -> OWS_InputDataValues """ Convert submitted :term:`CWL` workflow inputs into corresponding :mod:`OWSLib.wps` representation for execution. :param workflow_inputs: mapping of input IDs and values submitted to the workflow. :returns: converted OWS inputs ready for submission to remote WPS process. """ # prepare inputs complex_inputs = [] for process_input in self.wps_process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) wps_inputs = [] for input_item in workflow_inputs: input_key = get_any_id(input_item) input_val = get_any_value(input_item) # ignore optional inputs resolved as omitted if input_val is None: continue # in case of array inputs, must repeat (id,value) # in case of complex input (File), obtain location, otherwise get data value if not isinstance(input_val, list): input_val = [input_val] input_values = [] for val in input_val: mime_type = None encoding = None if isinstance(val, dict): fmt = val.get("format") # format as namespace:link val = val["location"] if fmt: fmt = get_format( fmt, default=DEFAULT_FORMAT) # format as content-type mime_type = fmt.mime_type or None encoding = fmt.encoding or None # avoid empty string # owslib only accepts strings, not numbers directly if isinstance(val, (int, float)): val = str(val) input_values.append((val, mime_type, encoding)) # need to use ComplexDataInput structure for complex input # TODO: BoundingBox not supported for input_value, mime_type, encoding in input_values: if input_key in complex_inputs: input_value = ComplexDataInput(input_value, mimeType=mime_type, encoding=encoding) wps_inputs.append((input_key, input_value)) return wps_inputs
def validate_outputs(self, job_id, result_payload, outputs_payload, result_file_content): # get generic details wps_uuid = str(self.job_store.fetch_by_id(job_id).wps_id) wps_out_url = self.settings["weaver.wps_output_url"] wps_output = f"{wps_out_url}/{wps_uuid}/{self.out_file}" # --- validate /results path format --- assert len(result_payload) == 1 assert isinstance(result_payload, dict) assert isinstance(result_payload[self.out_key], dict) result_values = { out_id: get_any_value(result_payload[out_id]) for out_id in result_payload } assert result_values[self.out_key] == wps_output # --- validate /outputs path format --- # check that output is HTTP reference to file output_values = { out["id"]: get_any_value(out) for out in outputs_payload["outputs"] } assert len(output_values) == 1 assert output_values[self.out_key] == wps_output # check that actual output file was created in expected location along with XML job status wps_outdir = self.settings["weaver.wps_output_dir"] wps_out_file = os.path.join(wps_outdir, job_id, self.out_file) assert not os.path.exists(os.path.join(wps_outdir, self.out_file)), \ "File is expected to be created in sub-directory of Job ID, not directly in WPS output directory." # job log, XML status and output directory can be retrieved with both Job UUID and underlying WPS UUID reference assert os.path.isfile(os.path.join(wps_outdir, f"{wps_uuid}.log")) assert os.path.isfile(os.path.join(wps_outdir, f"{wps_uuid}.xml")) assert os.path.isfile(os.path.join(wps_outdir, wps_uuid, self.out_file)) assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.log")) assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml")) assert os.path.isfile(wps_out_file) # validate content with open(wps_out_file, mode="r", encoding="utf-8") as res_file: assert res_file.read() == result_file_content
def parse_wps_inputs(wps_process, job): """ Parses expected WPS process inputs against submitted job input values considering supported process definitions. """ complex_inputs = [] for process_input in wps_process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() # parse both dict and list type inputs job_inputs = job.inputs.items() if isinstance(job.inputs, dict) else job.get("inputs", []) for process_input in job_inputs: if isinstance(process_input, tuple): input_id = process_input[0] process_value = process_input[1] else: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance(process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ # when value is an array of dict that each contain a file reference (get_any_value(val)[7:] if str(get_any_value(val)).startswith("file://") else get_any_value(val)) if isinstance(val, dict) else # when value is directly a single dict with file reference (val[7:] if str(val).startswith("file://") else val) for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values]) except KeyError: wps_inputs = [] return wps_inputs
def _parse_inputs(inputs): # type: (Optional[Union[str, JSON]]) -> Union[OperationResult, JSON] try: if isinstance(inputs, str): # loaded inputs could be mapping or listing format (any schema: CWL, OGC, OLD) inputs = load_file(inputs) if inputs != "" else [] if not inputs or not isinstance(inputs, (dict, list)): return OperationResult( False, "No inputs or invalid schema provided.", inputs) if isinstance(inputs, list): # list of literals from CLI if any("=" in value for value in inputs): inputs = repr2json_input_values(inputs) # list of single file from CLI (because of 'nargs') elif len(inputs) == 1 and "=" not in inputs[0]: inputs = load_file(inputs[0]) elif len(inputs) == 1 and inputs[0] == "": inputs = [] if isinstance(inputs, list): inputs = {"inputs": inputs} # OLD format provided directly # consider possible ambiguity if literal CWL input is named 'inputs' # - if value of 'inputs' is an object, it can collide with 'OGC' schema, # unless 'value/href' are present or their sub-dict don't have CWL 'class' # - if value of 'inputs' is an array, it can collide with 'OLD' schema, # unless 'value/href' (and 'id' technically) are present values = inputs.get("inputs", null) if (values is null or values is not null and ((isinstance(values, dict) and get_any_value(values) is null and "class" not in values) or (isinstance(values, list) and all( isinstance(v, dict) and get_any_value(v) is null for v in values)))): values = cwl2json_input_values(inputs) if values is null: raise ValueError( "Input values parsed as null. Could not properly detect employed schema." ) except Exception as exc: return OperationResult( False, f"Failed inputs parsing with error: [{exc!s}].", inputs) return values
def get_job_inputs(request): # type: (Request) -> HTTPException """ Retrieve the inputs of a job. """ job = get_job(request) inputs = dict(inputs=[ dict(id=get_any_id(_input), value=get_any_value(_input)) for _input in job.inputs ]) inputs.update({"links": job.links(request, self_link="inputs")}) inputs = sd.JobInputsSchema().deserialize(inputs) return HTTPOk(json=inputs)
def stage_results(self, results, expected_outputs, out_dir): # type: (JobResults, CWL_ExpectedOutputs, str) -> None """ Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory. This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`. .. note:: The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``, but this definition could be a glob pattern to match multiple file and/or nested directories. We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``). """ for result in results: res_id = get_any_id(result) if res_id not in expected_outputs: continue # plan ahead when list of multiple output values could be supported result_values = get_any_value(result) if not isinstance(result_values, list): result_values = [result_values] cwl_out_dir = out_dir.rstrip("/") for value in result_values: src_name = value.split("/")[-1] dst_path = "/".join([cwl_out_dir, src_name]) # performance improvement: # Bypass download if file can be resolved as local resource (already fetched or same server). # Because CWL expects the file to be in specified 'out_dir', make a link for it to be found # even though the file is stored in the full job output location instead (already staged by step). map_path = map_wps_output_location(value, self.settings) as_link = False if map_path: LOGGER.info( "Detected result [%s] from [%s] as local reference to this instance. " "Skipping fetch and using local copy in output destination: [%s]", res_id, value, dst_path) LOGGER.debug("Mapped result [%s] to local reference: [%s]", value, map_path) src_path = map_path as_link = True else: LOGGER.info( "Fetching result [%s] from [%s] to CWL output destination: [%s]", res_id, value, dst_path) src_path = value fetch_file(src_path, cwl_out_dir, settings=self.settings, link=as_link)
def get_results(job, container): # type: (Job, AnySettingsContainer) -> JSON """ Obtains the results with extended full WPS output URL as applicable and according to configuration settings. """ wps_url = get_wps_output_url(container) if not wps_url.endswith("/"): wps_url = wps_url + "/" outputs = [] for result in job.results: rtype = "data" if any(k in result for k in ["data", "value"]) else "href" value = get_any_value(result) if rtype == "href" and "://" not in value: value = wps_url + str(value).lstrip("/") outputs.append({"id": get_any_id(result), rtype: value}) return {"outputs": outputs}
def execute(self, workflow_inputs, out_dir, expected_outputs): self.update_status("Preparing execute request for remote WPS1 provider.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for %s", self.process) try: try: wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify) raise_on_xml_exception(wps._capabilities) # noqa: W0212 except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex))) try: process = wps.describeprocess(self.process) except Exception as ex: raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex))) # prepare inputs complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) # remove any 'null' input, should employ the 'default' of the remote WPS process inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs) wps_inputs = [] for input_key in inputs_provided_keys: input_val = workflow_inputs[input_key] # in case of array inputs, must repeat (id,value) # in case of complex input (File), obtain location, otherwise get data value if not isinstance(input_val, list): input_val = [input_val] input_values = [] for val in input_val: if isinstance(val, dict): val = val["location"] # owslib only accepts strings, not numbers directly if isinstance(val, (int, float)): val = str(val) if val.startswith("file://"): # we need to host file starting with file:// scheme val = self.host_file(val) input_values.append(val) # need to use ComplexDataInput structure for complex input # TODO: BoundingBox not supported for input_value in input_values: if input_key in complex_inputs: input_value = ComplexDataInput(input_value) wps_inputs.append((input_key, input_value)) # prepare outputs outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs if o.identifier in expected_outputs] self.update_status("Executing job on remote WPS1 provider.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) mode = EXECUTE_MODE_ASYNC execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True) if not execution.process and execution.errors: raise execution.errors[0] self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) max_retries = 5 num_retries = 0 run_step = 0 job_id = "<undefined>" while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) try: execution = check_wps_status(location=execution.statusLocation, verify=self.verify, sleep_secs=wait_secs(run_step)) job_id = execution.statusLocation.replace(".xml", "").split("/")[-1] LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, execution.statusMessage, execution.statusLocation)) self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()), message=execution.statusMessage, progress=execution.percentCompleted, duration=None), # get if available map_progress(execution.percentCompleted, REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) except Exception as exc: num_retries += 1 LOGGER.debug("Exception raised: %r", exc) sleep(1) else: num_retries = 0 run_step += 1 if not execution.isSucceded(): exec_msg = execution.statusMessage or "Job failed." LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), execution.percentCompleted, exec_msg, execution.statusLocation)) raise Exception(execution.statusMessage or "Job failed.") self.update_status("Fetching job outputs from remote WPS1 provider.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = [ows2json_output(output, process) for output in execution.processOutputs] for result in results: result_id = get_any_id(result) result_val = get_any_value(result) if result_id in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]]) # TODO Should we handle other type than File reference? resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings) LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) except Exception as exc: exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) LOGGER.exception(exc) raise Exception(errors) self.update_status("Execution on remote WPS1 provider completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def stage_results(self, results, expected_outputs, out_dir): # type: (JobResults, CWL_ExpectedOutputs, str) -> None """ Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory. This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`. .. note:: The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``, but this definition could be a glob pattern to match multiple file and/or nested directories. We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``). .. seealso:: Function :func:`weaver.processes.convert.any2cwl_io` defines a generic glob pattern using the output ID and expected file extension based on Content-Type format. Since the remote :term:`WPS` :term:`Process` doesn't necessarily produces file names with the output ID as expected to find them (could be anything), staging must patch locations to let :term:`CWL` runtime resolve the files according to glob definitions. .. warning:: Only remote :term:`Provider` implementations (which auto-generate a pseudo :term:`CWL` to map components) that produce outputs with inconsistent file names as described above should set attribute :attr:`WpsProcessInterface.stage_output_id_nested` accordingly. For :term:`Process` that directly provide an actual :term:`CWL` :term:`Application Package` definition (e.g.: Docker application), auto-mapping of glob patterns should be avoided, as it is expected that the :term:`CWL` contains real mapping to be respected for correct execution and retrieval of outputs from the application. """ for result in results: res_id = get_any_id(result) if res_id not in expected_outputs: continue # plan ahead when list of multiple output values could be supported result_values = get_any_value(result) if not isinstance(result_values, list): result_values = [result_values] if self.stage_output_id_nested: cwl_out_dir = "/".join([out_dir.rstrip("/"), res_id]) else: cwl_out_dir = out_dir.rstrip("/") os.makedirs(cwl_out_dir, mode=0o700, exist_ok=True) for value in result_values: src_name = value.split("/")[-1] dst_path = "/".join([cwl_out_dir, src_name]) # performance improvement: # Bypass download if file can be resolved as local resource (already fetched or same server). # Because CWL expects the file to be in specified 'out_dir', make a link for it to be found # even though the file is stored in the full job output location instead (already staged by step). map_path = map_wps_output_location(value, self.settings) as_link = False if map_path: LOGGER.info( "Detected result [%s] from [%s] as local reference to this instance. " "Skipping fetch and using local copy in output destination: [%s]", res_id, value, dst_path) LOGGER.debug("Mapped result [%s] to local reference: [%s]", value, map_path) src_path = map_path as_link = True else: LOGGER.info( "Fetching result [%s] from [%s] to CWL output destination: [%s]", res_id, value, dst_path) src_path = value fetch_file(src_path, cwl_out_dir, settings=self.settings, link=as_link)
def parse_wps_inputs(wps_process, job): # type: (ProcessOWS, Job) -> List[Tuple[str, OWS_Input_Type]] """ Parses expected WPS process inputs against submitted job input values considering supported process definitions. """ complex_inputs = {} # type: Dict[str, ComplexInput] for process_input in wps_process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs[process_input.identifier] = process_input try: wps_inputs = [] # parse both dict and list type inputs job_inputs = job.inputs.items() if isinstance( job.inputs, dict) else job.get("inputs", []) for job_input in job_inputs: if isinstance(job_input, tuple): input_id = job_input[0] input_val = job_input[1] job_input = input_val else: input_id = get_any_id(job_input) input_val = get_any_value(job_input) # in case of array inputs, must repeat (id,value) if isinstance(input_val, list): input_values = input_val input_details = input_val # each value has its own metadata else: input_values = [input_val] input_details = [ job_input ] # metadata directly in definition, not nested per array value # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ # when value is an array of dict that each contain a file reference (get_any_value(val)[7:] if str(get_any_value(val)).startswith( "file://") else get_any_value(val)) if isinstance( val, dict) else # when value is directly a single dict with file reference (val[7:] if str(val).startswith("file://") else val) for val in input_values ] for input_value, input_detail in zip(input_values, input_details): # need to use ComplexDataInput structure for complex input if input_id in complex_inputs: # if provided, pass down specified data input format to allow validation against supported formats ctype = get_field(input_detail, "type", default=None) encoding = None if not ctype: media_format = get_field(input_detail, "format", default=None) if isinstance(media_format, dict): ctype = get_field(input_detail, "mime_type", search_variations=True, default=None) encoding = get_field(input_detail, "encoding", search_variations=True, default=None) wps_inputs.append((input_id, ComplexDataInput(input_value, mimeType=ctype, encoding=encoding))) # need to use literal String for anything else than complex # FIXME: pre-validate allowed literal values? # TODO: BoundingBox not supported else: wps_inputs.append((input_id, str(input_value))) except KeyError: wps_inputs = [] return wps_inputs
def collect_statistics(process, settings=None, job=None, rss_start=None): # type: (Optional[psutil.Process], Optional[SettingsType], Optional[Job], Optional[int]) -> Optional[Statistics] """ Collect any available execution statistics and store them in the :term:`Job` if provided. """ try: mem_used = None if job: mem_info = list( filter( lambda line: "cwltool" in line and "memory used" in line, job.logs)) mem_used = None if mem_info: mem_info = mem_info[0].split(":")[-1].strip() mem_used = parse_number_with_unit(mem_info, binary=True) stats = {} # type: JSON if mem_used: stats["application"] = { # see: 'cwltool.job.JobBase.process_monitor', reported memory in logs uses 'rss' "usedMemory": apply_number_with_unit(mem_used, binary=True), "usedMemoryBytes": mem_used, } rss = None if process: proc_info = process.memory_full_info() rss = getattr(proc_info, "rss", 0) uss = getattr(proc_info, "uss", 0) vms = getattr(proc_info, "vms", 0) stats["process"] = { "rss": apply_number_with_unit(rss, binary=True), "rssBytes": rss, "uss": apply_number_with_unit(uss, binary=True), "ussBytes": uss, "vms": apply_number_with_unit(vms, binary=True), "vmsBytes": vms, } fields = [("usedThreads", "num_threads"), ("usedCPU", "cpu_num"), ("usedHandles", "num_handles")] for field, method in fields: func = getattr(process, method, None) stats["process"][field] = func() if func is not None else 0 if rss_start and rss: # diff of RSS between start/end to consider only execution of the job steps # this more accurately reports used memory by the execution itself, omitting celery worker's base memory rss_diff = rss - rss_start stats["process"]["usedMemory"] = apply_number_with_unit( rss_diff, binary=True) stats["process"]["usedMemoryBytes"] = rss_diff total_size = 0 if job: stats["outputs"] = {} for result in job.results: res_ref = get_any_value(result, file=True) if res_ref and isinstance(res_ref, str): if res_ref.startswith( f"/{job.id}"): # pseudo-relative reference out_dir = get_wps_output_dir(settings) res_ref = os.path.join(out_dir, res_ref[1:]) if os.path.isfile(res_ref): res_stat = os.stat(res_ref) res_id = get_any_id(result) res_size = res_stat.st_size stats["outputs"][res_id] = { "size": apply_number_with_unit(res_size, binary=True), "sizeBytes": res_size, } total_size += res_size stats["process"]["totalSize"] = apply_number_with_unit(total_size, binary=True) stats["process"]["totalSizeBytes"] = total_size if stats and job: job.statistics = stats return stats or None except Exception as exc: # pragma: no cover LOGGER.warning( "Ignoring error that occurred during statistics collection [%s]", str(exc), exc_info=exc)
def get_job_results_response(job, container, headers=None): # type: (Job, AnySettingsContainer, Optional[AnyHeadersContainer]) -> AnyResponseType """ Generates the :term:`OGC` compliant :term:`Job` results response according to submitted execution parameters. Parameters that impact the format of the response are: - Amount of outputs to be returned. - Parameter ``response: raw|document`` - Parameter ``transmissionMode: value|reference`` per output if ``response: raw``. .. seealso:: More details available for each combination: - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 :param job: Job for which to generate the results response. :param container: Application settings. :param headers: Additional headers to provide in the response. """ raise_job_dismissed(job, container) raise_job_bad_status(job, container) # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' # See: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document is_raw = job.execution_response == ExecuteResponse.RAW results, refs = get_results( job, container, value_key="value", schema=JobInputsOutputsSchema. OGC, # not strict to provide more format details link_references=is_raw) headers = headers or {} if "location" not in headers: headers["Location"] = job.status_url(container) if not is_raw: # note: # Cannot add "links" field in response body because variable Output ID keys are directly at the root # Possible conflict with an output that would be named "links". results = sd.Result().deserialize(results) return HTTPOk(json=results, headers=headers) if not results: # avoid schema validation error if all by reference # Status code 204 for empty body # see: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref refs.extend(headers.items()) return HTTPNoContent(headers=refs) # raw response can be data-only value, link-only or a mix of them if results: # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one out_vals = list(results.items( )) # type: List[Tuple[str, ExecutionResultValue]] # noqa out_info = out_vals[0][-1] # type: ExecutionResultValue out_type = get_any_value(out_info, key=True) out_data = get_any_value(out_info) # FIXME: https://github.com/crim-ca/weaver/issues/376 # implement multipart, both for multi-output IDs and array-output under same ID if len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1): # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi raise HTTPNotImplemented( json={ "code": "NotImplemented", "type": "NotImplemented", "detail": "Multipart results with 'transmissionMode=value' and 'response=raw' not implemented.", }) # single value only out_data = out_data[0] if isinstance(out_data, list) else out_data if out_type == "href": out_path = map_wps_output_location(out_data, container, exists=True, url=False) out_type = out_info.get("type") # noqa out_headers = get_file_headers(out_path, download_headers=True, content_headers=True, content_type=out_type) resp = FileResponse(out_path) resp.headers.update(out_headers) resp.headers.update(headers) else: resp = HTTPOk(body=out_data, charset="UTF-8", content_type=ContentType.TEXT_PLAIN, headers=headers) else: resp = HTTPOk(headers=headers) if refs: # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi resp.headerlist.extend(refs) return resp
def get_results( job, # type: Job container, # type: AnySettingsContainer value_key=None, # type: Optional[str] schema=JobInputsOutputsSchema.OLD, # type: JobInputsOutputsSchemaType link_references=False, # type: bool ): # type: (...) -> Tuple[ExecutionResults, HeadersTupleType] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. :param job: job from which to retrieve results. :param container: any container giving access to instance settings (to resolve reference output location). :param value_key: If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. Otherwise, all values will have the specified key. :param schema: Selects which schema to employ for representing the output results (listing or mapping). :param link_references: If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference. :returns: Tuple with: - List or mapping of all outputs each with minimally an ID and value under the requested key. - List of ``Link`` headers for reference outputs when requested. Empty otherwise. """ settings = get_settings(container) wps_url = get_wps_output_url(settings) if not wps_url.endswith("/"): wps_url = wps_url + "/" schema = JobInputsOutputsSchema.get(str(schema).lower(), default=JobInputsOutputsSchema.OLD) strict = schema.endswith("+strict") schema = schema.split("+")[0] ogc_api = schema == JobInputsOutputsSchema.OGC outputs = {} if ogc_api else [] fmt_key = "mediaType" if ogc_api else "mimeType" out_ref = convert_output_params_schema( job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} references = {} for result in job.results: rtype = "data" if any(k in result for k in ["data", "value"]) else "href" value = get_any_value(result) out_key = rtype out_id = get_any_id(result) out_mode = out_ref.get(out_id, {}).get("transmissionMode") as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE if rtype == "href": # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) if value.startswith("/"): value = str(value).lstrip("/") if "://" not in value: value = wps_url + value elif ogc_api: out_key = "value" elif value_key: out_key = value_key output = {out_key: value} if rtype == "href": # required for the rest to be there, other fields optional if "mimeType" not in result: result["mimeType"] = get_format( value, default=ContentType.TEXT_PLAIN).mime_type if ogc_api or not strict: output["type"] = result["mimeType"] if not ogc_api or not strict or as_ref: output["format"] = {fmt_key: result["mimeType"]} for field in ["encoding", "schema"]: if field in result: output["format"][field] = result[field] elif rtype != "href": # literal data # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) dtype = result.get( "dataType", any2wps_literal_datatype(value, is_value=True) or "string") if ogc_api: output["dataType"] = {"name": dtype} else: output["dataType"] = dtype if ogc_api or as_ref: mapping = references if as_ref else outputs if out_id in mapping: output_list = mapping[out_id] if not isinstance(output_list, list): output_list = [output_list] output_list.append(output) mapping[out_id] = output_list else: mapping[out_id] = output else: # if ordered insert supported by python version, insert ID first output = dict([("id", out_id)] + list(output.items())) # noqa outputs.append(output) # needed to collect and aggregate outputs of same ID first in case of array # convert any requested link references using indices if needed headers = [] for out_id, output in references.items(): res_links = make_result_link(out_id, output, job.id, settings) headers.extend([("Link", link) for link in res_links]) return outputs, headers
def get_results(job, container, value_key=None, ogc_api=False): # type: (Job, AnySettingsContainer, Optional[str], bool) -> Union[List[JSON], JSON] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. :param job: job from which to retrieve results. :param container: any container giving access to instance settings (to resolve reference output location). :param value_key: If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. Otherwise, all values will have the specified key. :param ogc_api: If ``True``, formats the results using the ``OGC API - Processes`` format. :returns: list of all outputs each with minimally an ID and value under the requested key. """ wps_url = get_wps_output_url(container) if not wps_url.endswith("/"): wps_url = wps_url + "/" outputs = {} if ogc_api else [] fmt_key = "mediaType" if ogc_api else "mimeType" for result in job.results: rtype = "data" if any(k in result for k in ["data", "value"]) else "href" value = get_any_value(result) out_id = get_any_id(result) out_key = rtype if rtype == "href": # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) if value.startswith("/"): value = str(value).lstrip("/") if "://" not in value: value = wps_url + value elif ogc_api: out_key = "value" elif value_key: out_key = value_key output = {out_key: value} if rtype == "href": # required for the rest to be there, other fields optional if "mimeType" not in result: result["mimeType"] = get_format( value, default=CONTENT_TYPE_TEXT_PLAIN).mime_type output["format"] = {fmt_key: result["mimeType"]} for field in ["encoding", "schema"]: if field in result: output["format"][field] = result[field] elif rtype != "href": # literal data # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) dtype = result.get( "dataType", any2wps_literal_datatype(value, is_value=True) or "string") if ogc_api: output["dataType"] = {"name": dtype} else: output["dataType"] = dtype if ogc_api: if out_id in outputs: output_list = outputs[out_id] if not isinstance(output_list, list): output_list = [output_list] output_list.append(output) outputs[out_id] = output_list else: outputs[out_id] = output else: # if ordered insert supported by python version, insert ID first output = dict([("id", out_id)] + list(output.items())) # noqa outputs.append(output) return outputs
def execute(self, workflow_inputs, out_dir, expected_outputs): # TODO: test visible = self.is_visible() if not visible: # includes private visibility and non-existing cases if visible is None: LOGGER.info( "Process [%s] access is unauthorized on [%s] - deploying as admin.", self.process, self.url) elif visible is False: LOGGER.info( "Process [%s] is not deployed on [%s] - deploying.", self.process, self.url) # TODO: Maybe always redeploy? What about cases of outdated deployed process? try: self.deploy() except Exception as exc: # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed pass_http_error(exc, [HTTPConflict, HTTPInternalServerError]) LOGGER.info("Process [%s] enforced to public visibility.", self.process) try: self.set_visibility(visibility=VISIBILITY_PUBLIC) # TODO: support for Spacebel, remove when visibility route properly implemented on ADES except Exception as exc: pass_http_error(exc, HTTPNotFound) self.update_status("Preparing execute request for remote ADES.", REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING) LOGGER.debug("Execute process WPS request for [%s]", self.process) execute_body_inputs = [] execute_req_id = "id" execute_req_input_val_href = "href" execute_req_input_val_data = "data" for workflow_input_key, workflow_input_value in workflow_inputs.items( ): if isinstance(workflow_input_value, list): for workflow_input_value_item in workflow_input_value: if isinstance( workflow_input_value_item, dict) and "location" in workflow_input_value_item: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value_item["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value_item }) else: if isinstance(workflow_input_value, dict) and "location" in workflow_input_value: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_href: workflow_input_value["location"] }) else: execute_body_inputs.append({ execute_req_id: workflow_input_key, execute_req_input_val_data: workflow_input_value }) for exec_input in execute_body_inputs: if execute_req_input_val_href in exec_input and isinstance( exec_input[execute_req_input_val_href], str): if exec_input[execute_req_input_val_href].startswith( "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)): exec_input[execute_req_input_val_href] = "file{0}".format( exec_input[execute_req_input_val_href] [len(OPENSEARCH_LOCAL_FILE_SCHEME):]) elif exec_input[execute_req_input_val_href].startswith( "file://"): exec_input[execute_req_input_val_href] = self.host_file( exec_input[execute_req_input_val_href]) LOGGER.debug("Hosting intermediate input [%s] : [%s]", exec_input[execute_req_id], exec_input[execute_req_input_val_href]) execute_body_outputs = [{ execute_req_id: output, "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE } for output in expected_outputs] self.update_status("Executing job on remote ADES.", REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING) execute_body = dict(mode=EXECUTE_MODE_ASYNC, response=EXECUTE_RESPONSE_DOCUMENT, inputs=execute_body_inputs, outputs=execute_body_outputs) request_url = self.url + process_jobs_uri.format( process_id=self.process) response = self.make_request(method="POST", url=request_url, json=execute_body, retry=True) if response.status_code != 201: raise Exception( "Was expecting a 201 status code from the execute request : {0}" .format(request_url)) job_status_uri = response.headers["Location"] job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) self.update_status( "Monitoring job on remote ADES : {0}".format(job_status_uri), REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING) while job_status_value not in status.JOB_STATUS_CATEGORIES[ status.STATUS_CATEGORY_FINISHED]: sleep(5) job_status = self.get_job_status(job_status_uri) job_status_value = status.map_status(job_status["status"]) LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) self.update_status( get_job_log_msg(status=job_status_value, message=get_any_message(job_status), progress=job_status.get("percentCompleted", 0), duration=job_status.get( "duration", None)), # get if available map_progress(job_status.get("percentCompleted", 0), REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT), status.STATUS_RUNNING) if job_status_value != status.STATUS_SUCCEEDED: LOGGER.debug( get_log_monitor_msg(job_status["jobID"], job_status_value, job_status.get("percentCompleted", 0), get_any_message(job_status), job_status.get("statusLocation"))) raise Exception(job_status) self.update_status("Fetching job outputs from remote ADES.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) results = self.get_job_results(job_status["jobID"]) for result in results: if get_any_id(result) in expected_outputs: # This is where cwl expect the output file to be written # TODO We will probably need to handle multiple output value... dst_fn = "/".join([ out_dir.rstrip("/"), expected_outputs[get_any_id(result)] ]) # TODO Should we handle other type than File reference? resp = request_extra("get", get_any_value(result), allow_redirects=True, settings=self.settings) LOGGER.debug( "Fetching result output from [%s] to cwl output destination: [%s]", get_any_value(result), dst_fn) with open(dst_fn, mode="wb") as dst_fh: dst_fh.write(resp.content) self.update_status("Execution on remote ADES completed.", REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
def test_get_any_value(): assert get_any_value({}) is None assert get_any_value({}, default=null) is null assert get_any_value({}, default=1) == 1 assert get_any_value({"data": 2}) == 2 assert get_any_value({"data": 2}, default=1) == 2 assert get_any_value({"data": 2}, data=False) is None assert get_any_value({"data": 2}, default=1, data=False) == 1 assert get_any_value({"value": 2}) == 2 assert get_any_value({"value": 2}, default=1) == 2 assert get_any_value({"value": 2}, data=False) is None assert get_any_value({"value": 2}, default=1, data=False) == 1 assert get_any_value({"href": "http://localhost/test.txt"}) == "http://localhost/test.txt" assert get_any_value({"href": "http://localhost/test.txt"}, default=1) == "http://localhost/test.txt" assert get_any_value({"href": "http://localhost/test.txt"}, file=False) is None assert get_any_value({"href": "http://localhost/test.txt"}, file=False, default=1) == 1 assert get_any_value({"reference": "http://localhost/test.txt"}) == "http://localhost/test.txt" assert get_any_value({"reference": "http://localhost/test.txt"}, default=1) == "http://localhost/test.txt" assert get_any_value({"reference": "http://localhost/test.txt"}, file=False) is None assert get_any_value({"reference": "http://localhost/test.txt"}, file=False, default=1) == 1 assert get_any_value({"file": "http://localhost/test.txt"}) is None assert get_any_value({"data": 1, "value": 2, "href": "http://localhost/test.txt"}, file=False, data=False) is None
def execute_process(self, job_id, url, headers=None): from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") settings = get_settings(app) task_logger = get_task_logger(__name__) load_pywps_config(settings) task_logger.debug("Job task setup.") # reset the connection because we are in a forked celery process db = get_db(app, reset_connection=True) store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.task_id = self.request.id job.progress = JOB_PROGRESS_SETUP job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) try: try: job.progress = JOB_PROGRESS_DESCRIBE job.save_log( logger=task_logger, message="Execute WPS request for process [{!s}]".format( job.process)) ssl_verify = get_ssl_verify_option("get", url, settings=settings) wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) set_wps_language(wps, accept_language=job.accept_language) raise_on_xml_exception(wps._capabilities) # noqa except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS capabilities. Error: [{}].".format( str(ex))) try: process = wps.describeprocess(job.process) except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS process description. Error: [{}].". format(str(ex))) # prepare inputs job.progress = JOB_PROGRESS_GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() for process_input in job.inputs: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance( process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ val[7:] if str(val).startswith("file://") else val for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values ]) except KeyError: wps_inputs = [] # prepare outputs job.progress = JOB_PROGRESS_GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC job.progress = JOB_PROGRESS_EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, mode=mode, job_uuid=job.id) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log(logger=task_logger, level=logging.DEBUG, message="Updated job status location: [{}].".format( wps_status_path)) job.status = map_status(STATUS_STARTED) job.status_message = execution.statusMessage or "{} initiation done.".format( str(job)) job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ .format(execution.percentCompleted, job_msg or "n/a") # job.save_log(logger=task_logger) # job = store.update_job(job) if execution.isComplete(): job.mark_finished() job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END msg_progress = " (status: {})".format( job_msg) if job_msg else "" if execution.isSucceded(): job.status = map_status(STATUS_SUCCEEDED) job.status_message = "Job succeeded{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(logger=task_logger) job_results = [ ows2json_output(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") job.status_message = "Job failed{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = "Could not read status XML document for {!s}. Trying again...".format( job) job.save_log(errors=execution.errors, logger=task_logger) sleep(1) else: # job.status_message = "Update {}...".format(str(job)) # job.save_log(logger=task_logger) num_retries = 0 run_step += 1 finally: job = store.update_job(job) except Exception as exc: LOGGER.exception("Failed running [%s]", job) job.status = map_status(STATUS_FAILED) job.status_message = "Failed to run {!s}.".format(job) job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) job.save_log(errors=errors, logger=task_logger) finally: job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END job.status_message = "Job {}.".format(job.status) job.save_log(logger=task_logger) # Send email if requested if job.notification_email is not None: job.progress = JOB_PROGRESS_NOTIFY try: notify_job_complete(job, job.notification_email, settings) message = "Notification email sent successfully." job.save_log(logger=task_logger, message=message) except Exception as exc: exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) exception = "{0}: {1!s}".format(exception_class, exc) message = "Couldn't send notification email ({})".format( exception) job.save_log(errors=message, logger=task_logger, message=message) job.progress = JOB_PROGRESS_DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status