Example #1
0
def make_result_link(result_id, result, job_id, settings):
    # type: (str, Union[ExecutionResultObject, ExecutionResultArray], AnyUUID, SettingsType) -> List[str]
    """
    Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission.

    .. seealso::
        :rfc:`8288`: HTTP ``Link`` header specification.
    """
    values = result if isinstance(result, list) else [result]
    suffixes = list(
        f".{idx}"
        for idx in range(len(values))) if isinstance(result, list) else [""]
    wps_url = get_wps_output_url(settings).strip("/")
    links = []
    for suffix, value in zip(suffixes, values):
        key = get_any_value(result, key=True)
        if key != "href":
            # literal data to be converted to link
            # plain text file must be created containing the raw literal data
            typ = ContentType.TEXT_PLAIN  # as per '/rec/core/process-execute-sync-document-ref'
            enc = "UTF-8"
            out = get_wps_output_dir(settings)
            val = get_any_value(value, data=True, file=False)
            loc = os.path.join(job_id, result_id + suffix + ".txt")
            url = f"{wps_url}/{loc}"
            path = os.path.join(out, loc)
            with open(path, mode="w", encoding=enc) as out_file:
                out_file.write(val)
        else:
            fmt = get_field(result,
                            "format",
                            default={"mediaType": ContentType.TEXT_PLAIN})
            typ = get_field(fmt,
                            "mime_type",
                            search_variations=True,
                            default=ContentType.TEXT_PLAIN)
            enc = get_field(fmt,
                            "encoding",
                            search_variations=True,
                            default=None)
            url = get_any_value(value, data=False,
                                file=True)  # should already include full path
            if fmt == ContentType.TEXT_PLAIN and not enc:  # only if text, otherwise binary content could differ
                enc = "UTF-8"  # default both omit/empty
        encoding = f"; charset={enc}" if enc else ""
        links.append(
            f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}{encoding}")
    return links
Example #2
0
    def get_results(self, monitor_reference):
        # type: (str) -> JobResults
        """
        Obtains produced output results from successful job status ID.
        """
        # use '/results' endpoint instead of '/outputs' to ensure support with other
        result_url = monitor_reference + "/results"
        response = self.make_request(method="GET", url=result_url, retry=True)
        response.raise_for_status()
        contents = response.json()

        # backward compatibility for ADES that returns output IDs nested under 'outputs'
        if "outputs" in contents:
            # ensure that we don't incorrectly pick a specific output ID named 'outputs'
            maybe_outputs = contents["outputs"]
            if isinstance(maybe_outputs,
                          dict) and get_any_id(maybe_outputs) is None:
                contents = maybe_outputs
            # backward compatibility for ADES that returns list of outputs nested under 'outputs'
            # (i.e.: as Weaver-specific '/outputs' endpoint)
            elif isinstance(maybe_outputs, list) and all(
                    get_any_id(out) is not None for out in maybe_outputs):
                contents = maybe_outputs

        # rebuild the expected (old) list format for calling method
        if isinstance(contents, dict) and all(
                get_any_value(out) is not None for out in contents.values()):
            outputs = []
            for out_id, out_val in contents.items():
                out_val.update({"id": out_id})
                outputs.append(out_val)
            contents = outputs
        return contents
Example #3
0
    def validate_outputs(self, job_id, result_payload, result_file_content):
        # check that output is HTTP reference to file
        output_values = {out["id"]: get_any_value(out) for out in result_payload["outputs"]}
        assert len(output_values) == 1
        wps_uuid = self.job_store.fetch_by_id(job_id).wps_id
        wps_out_path = "{}{}".format(self.settings["weaver.url"], self.settings["weaver.wps_output_path"])
        wps_output = "{}/{}/{}".format(wps_out_path, wps_uuid, self.out_file)
        assert output_values[self.out_key] == wps_output

        # check that actual output file was created in expected location along with XML job status
        wps_outdir = self.settings["weaver.wps_output_dir"]
        wps_out_file = os.path.join(wps_outdir, job_id, self.out_file)
        assert not os.path.exists(os.path.join(wps_outdir, self.out_file)), \
            "File is expected to be created in sub-directory of Job ID, not directly in WPS output directory."
        # job log, XML status and output directory can be retrieved with both Job UUID and underlying WPS UUID reference
        assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(wps_uuid)))
        assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(wps_uuid)))
        assert os.path.isfile(os.path.join(wps_outdir, wps_uuid, self.out_file))
        assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(job_id)))
        assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(job_id)))
        assert os.path.isfile(wps_out_file)

        # validate content
        with open(wps_out_file) as res_file:
            assert res_file.read() == result_file_content
Example #4
0
    def format_inputs(self, workflow_inputs):
        # type: (CWL_RuntimeInputList) -> OWS_InputDataValues
        """
        Convert submitted :term:`CWL` workflow inputs into corresponding :mod:`OWSLib.wps` representation for execution.

        :param workflow_inputs: mapping of input IDs and values submitted to the workflow.
        :returns: converted OWS inputs ready for submission to remote WPS process.
        """
        # prepare inputs
        complex_inputs = []
        for process_input in self.wps_process.dataInputs:
            if WPS_COMPLEX_DATA in process_input.dataType:
                complex_inputs.append(process_input.identifier)

        wps_inputs = []
        for input_item in workflow_inputs:
            input_key = get_any_id(input_item)
            input_val = get_any_value(input_item)

            # ignore optional inputs resolved as omitted
            if input_val is None:
                continue

            # in case of array inputs, must repeat (id,value)
            # in case of complex input (File), obtain location, otherwise get data value
            if not isinstance(input_val, list):
                input_val = [input_val]

            input_values = []
            for val in input_val:
                mime_type = None
                encoding = None
                if isinstance(val, dict):
                    fmt = val.get("format")  # format as namespace:link
                    val = val["location"]
                    if fmt:
                        fmt = get_format(
                            fmt,
                            default=DEFAULT_FORMAT)  # format as content-type
                        mime_type = fmt.mime_type or None
                        encoding = fmt.encoding or None  # avoid empty string

                # owslib only accepts strings, not numbers directly
                if isinstance(val, (int, float)):
                    val = str(val)

                input_values.append((val, mime_type, encoding))

            # need to use ComplexDataInput structure for complex input
            # TODO: BoundingBox not supported
            for input_value, mime_type, encoding in input_values:
                if input_key in complex_inputs:
                    input_value = ComplexDataInput(input_value,
                                                   mimeType=mime_type,
                                                   encoding=encoding)

                wps_inputs.append((input_key, input_value))
        return wps_inputs
Example #5
0
    def validate_outputs(self, job_id, result_payload, outputs_payload,
                         result_file_content):
        # get generic details
        wps_uuid = str(self.job_store.fetch_by_id(job_id).wps_id)
        wps_out_url = self.settings["weaver.wps_output_url"]
        wps_output = f"{wps_out_url}/{wps_uuid}/{self.out_file}"

        # --- validate /results path format ---
        assert len(result_payload) == 1
        assert isinstance(result_payload, dict)
        assert isinstance(result_payload[self.out_key], dict)
        result_values = {
            out_id: get_any_value(result_payload[out_id])
            for out_id in result_payload
        }
        assert result_values[self.out_key] == wps_output

        # --- validate /outputs path format ---

        # check that output is HTTP reference to file
        output_values = {
            out["id"]: get_any_value(out)
            for out in outputs_payload["outputs"]
        }
        assert len(output_values) == 1
        assert output_values[self.out_key] == wps_output

        # check that actual output file was created in expected location along with XML job status
        wps_outdir = self.settings["weaver.wps_output_dir"]
        wps_out_file = os.path.join(wps_outdir, job_id, self.out_file)
        assert not os.path.exists(os.path.join(wps_outdir, self.out_file)), \
            "File is expected to be created in sub-directory of Job ID, not directly in WPS output directory."
        # job log, XML status and output directory can be retrieved with both Job UUID and underlying WPS UUID reference
        assert os.path.isfile(os.path.join(wps_outdir, f"{wps_uuid}.log"))
        assert os.path.isfile(os.path.join(wps_outdir, f"{wps_uuid}.xml"))
        assert os.path.isfile(os.path.join(wps_outdir, wps_uuid,
                                           self.out_file))
        assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.log"))
        assert os.path.isfile(os.path.join(wps_outdir, f"{job_id}.xml"))
        assert os.path.isfile(wps_out_file)

        # validate content
        with open(wps_out_file, mode="r", encoding="utf-8") as res_file:
            assert res_file.read() == result_file_content
Example #6
0
def parse_wps_inputs(wps_process, job):
    """
    Parses expected WPS process inputs against submitted job input values considering supported process definitions.
    """
    complex_inputs = []
    for process_input in wps_process.dataInputs:
        if WPS_COMPLEX_DATA in process_input.dataType:
            complex_inputs.append(process_input.identifier)

    try:
        wps_inputs = list()
        # parse both dict and list type inputs
        job_inputs = job.inputs.items() if isinstance(job.inputs, dict) else job.get("inputs", [])
        for process_input in job_inputs:
            if isinstance(process_input, tuple):
                input_id = process_input[0]
                process_value = process_input[1]
            else:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
            # in case of array inputs, must repeat (id,value)
            input_values = process_value if isinstance(process_value, list) else [process_value]

            # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
            input_values = [
                # when value is an array of dict that each contain a file reference
                (get_any_value(val)[7:] if str(get_any_value(val)).startswith("file://") else get_any_value(val))
                if isinstance(val, dict) else
                # when value is directly a single dict with file reference
                (val[7:] if str(val).startswith("file://") else val)
                for val in input_values
            ]

            # need to use ComplexDataInput structure for complex input
            # need to use literal String for anything else than complex
            # TODO: BoundingBox not supported
            wps_inputs.extend([
                (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value))
                for input_value in input_values])
    except KeyError:
        wps_inputs = []
    return wps_inputs
Example #7
0
 def _parse_inputs(inputs):
     # type: (Optional[Union[str, JSON]]) -> Union[OperationResult, JSON]
     try:
         if isinstance(inputs, str):
             # loaded inputs could be mapping or listing format (any schema: CWL, OGC, OLD)
             inputs = load_file(inputs) if inputs != "" else []
         if not inputs or not isinstance(inputs, (dict, list)):
             return OperationResult(
                 False, "No inputs or invalid schema provided.", inputs)
         if isinstance(inputs, list):
             # list of literals from CLI
             if any("=" in value for value in inputs):
                 inputs = repr2json_input_values(inputs)
             # list of single file from CLI (because of 'nargs')
             elif len(inputs) == 1 and "=" not in inputs[0]:
                 inputs = load_file(inputs[0])
             elif len(inputs) == 1 and inputs[0] == "":
                 inputs = []
         if isinstance(inputs, list):
             inputs = {"inputs": inputs}  # OLD format provided directly
         # consider possible ambiguity if literal CWL input is named 'inputs'
         # - if value of 'inputs' is an object, it can collide with 'OGC' schema,
         #   unless 'value/href' are present or their sub-dict don't have CWL 'class'
         # - if value of 'inputs' is an array, it can collide with 'OLD' schema,
         #   unless 'value/href' (and 'id' technically) are present
         values = inputs.get("inputs", null)
         if (values is null or values is not null and
             ((isinstance(values, dict) and get_any_value(values) is null
               and "class" not in values) or
              (isinstance(values, list) and all(
                  isinstance(v, dict) and get_any_value(v) is null
                  for v in values)))):
             values = cwl2json_input_values(inputs)
         if values is null:
             raise ValueError(
                 "Input values parsed as null. Could not properly detect employed schema."
             )
     except Exception as exc:
         return OperationResult(
             False, f"Failed inputs parsing with error: [{exc!s}].", inputs)
     return values
Example #8
0
def get_job_inputs(request):
    # type: (Request) -> HTTPException
    """
    Retrieve the inputs of a job.
    """
    job = get_job(request)
    inputs = dict(inputs=[
        dict(id=get_any_id(_input), value=get_any_value(_input))
        for _input in job.inputs
    ])
    inputs.update({"links": job.links(request, self_link="inputs")})
    inputs = sd.JobInputsSchema().deserialize(inputs)
    return HTTPOk(json=inputs)
Example #9
0
    def stage_results(self, results, expected_outputs, out_dir):
        # type: (JobResults, CWL_ExpectedOutputs, str) -> None
        """
        Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory.

        This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`.

        .. note::
            The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``,
            but this definition could be a glob pattern to match multiple file and/or nested directories.
            We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``).
        """
        for result in results:
            res_id = get_any_id(result)
            if res_id not in expected_outputs:
                continue

            # plan ahead when list of multiple output values could be supported
            result_values = get_any_value(result)
            if not isinstance(result_values, list):
                result_values = [result_values]
            cwl_out_dir = out_dir.rstrip("/")
            for value in result_values:
                src_name = value.split("/")[-1]
                dst_path = "/".join([cwl_out_dir, src_name])
                # performance improvement:
                #   Bypass download if file can be resolved as local resource (already fetched or same server).
                #   Because CWL expects the file to be in specified 'out_dir', make a link for it to be found
                #   even though the file is stored in the full job output location instead (already staged by step).
                map_path = map_wps_output_location(value, self.settings)
                as_link = False
                if map_path:
                    LOGGER.info(
                        "Detected result [%s] from [%s] as local reference to this instance. "
                        "Skipping fetch and using local copy in output destination: [%s]",
                        res_id, value, dst_path)
                    LOGGER.debug("Mapped result [%s] to local reference: [%s]",
                                 value, map_path)
                    src_path = map_path
                    as_link = True
                else:
                    LOGGER.info(
                        "Fetching result [%s] from [%s] to CWL output destination: [%s]",
                        res_id, value, dst_path)
                    src_path = value
                fetch_file(src_path,
                           cwl_out_dir,
                           settings=self.settings,
                           link=as_link)
Example #10
0
def get_results(job, container):
    # type: (Job, AnySettingsContainer) -> JSON
    """
    Obtains the results with extended full WPS output URL as applicable and according to configuration settings.
    """
    wps_url = get_wps_output_url(container)
    if not wps_url.endswith("/"):
        wps_url = wps_url + "/"
    outputs = []
    for result in job.results:
        rtype = "data" if any(k in result for k in ["data", "value"]) else "href"
        value = get_any_value(result)
        if rtype == "href" and "://" not in value:
            value = wps_url + str(value).lstrip("/")
        outputs.append({"id": get_any_id(result), rtype: value})
    return {"outputs": outputs}
Example #11
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        self.update_status("Preparing execute request for remote WPS1 provider.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for %s", self.process)
        try:
            try:
                wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify)
                raise_on_xml_exception(wps._capabilities)  # noqa: W0212
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex)))
            try:
                process = wps.describeprocess(self.process)
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex)))

            # prepare inputs
            complex_inputs = []
            for process_input in process.dataInputs:
                if WPS_COMPLEX_DATA in process_input.dataType:
                    complex_inputs.append(process_input.identifier)

            # remove any 'null' input, should employ the 'default' of the remote WPS process
            inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs)

            wps_inputs = []
            for input_key in inputs_provided_keys:
                input_val = workflow_inputs[input_key]
                # in case of array inputs, must repeat (id,value)
                # in case of complex input (File), obtain location, otherwise get data value
                if not isinstance(input_val, list):
                    input_val = [input_val]

                input_values = []
                for val in input_val:
                    if isinstance(val, dict):
                        val = val["location"]

                    # owslib only accepts strings, not numbers directly
                    if isinstance(val, (int, float)):
                        val = str(val)

                    if val.startswith("file://"):
                        # we need to host file starting with file:// scheme
                        val = self.host_file(val)

                    input_values.append(val)

                # need to use ComplexDataInput structure for complex input
                # TODO: BoundingBox not supported
                for input_value in input_values:
                    if input_key in complex_inputs:
                        input_value = ComplexDataInput(input_value)

                    wps_inputs.append((input_key, input_value))

            # prepare outputs
            outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs
                       if o.identifier in expected_outputs]

            self.update_status("Executing job on remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING)

            mode = EXECUTE_MODE_ASYNC
            execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True)
            if not execution.process and execution.errors:
                raise execution.errors[0]

            self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider),
                               REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

            max_retries = 5
            num_retries = 0
            run_step = 0
            job_id = "<undefined>"
            while execution.isNotComplete() or run_step == 0:
                if num_retries >= max_retries:
                    raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries))
                try:
                    execution = check_wps_status(location=execution.statusLocation, verify=self.verify,
                                                 sleep_secs=wait_secs(run_step))
                    job_id = execution.statusLocation.replace(".xml", "").split("/")[-1]
                    LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                     execution.percentCompleted, execution.statusMessage,
                                                     execution.statusLocation))
                    self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()),
                                                       message=execution.statusMessage,
                                                       progress=execution.percentCompleted,
                                                       duration=None),  # get if available
                                       map_progress(execution.percentCompleted,
                                                    REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT),
                                       status.STATUS_RUNNING)
                except Exception as exc:
                    num_retries += 1
                    LOGGER.debug("Exception raised: %r", exc)
                    sleep(1)
                else:
                    num_retries = 0
                    run_step += 1

            if not execution.isSucceded():
                exec_msg = execution.statusMessage or "Job failed."
                LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                 execution.percentCompleted, exec_msg, execution.statusLocation))
                raise Exception(execution.statusMessage or "Job failed.")

            self.update_status("Fetching job outputs from remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING)

            results = [ows2json_output(output, process) for output in execution.processOutputs]
            for result in results:
                result_id = get_any_id(result)
                result_val = get_any_value(result)
                if result_id in expected_outputs:
                    # This is where cwl expect the output file to be written
                    # TODO We will probably need to handle multiple output value...
                    dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]])

                    # TODO Should we handle other type than File reference?

                    resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings)
                    LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn)
                    with open(dst_fn, mode="wb") as dst_fh:
                        dst_fh.write(resp.content)

        except Exception as exc:
            exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__)
            errors = "{0}: {1!s}".format(exception_class, exc)
            LOGGER.exception(exc)
            raise Exception(errors)

        self.update_status("Execution on remote WPS1 provider completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
Example #12
0
    def stage_results(self, results, expected_outputs, out_dir):
        # type: (JobResults, CWL_ExpectedOutputs, str) -> None
        """
        Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory.

        This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`.

        .. note::
            The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``,
            but this definition could be a glob pattern to match multiple file and/or nested directories.
            We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``).

        .. seealso::
            Function :func:`weaver.processes.convert.any2cwl_io` defines a generic glob pattern using the output ID
            and expected file extension based on Content-Type format. Since the remote :term:`WPS` :term:`Process`
            doesn't necessarily produces file names with the output ID as expected to find them (could be anything),
            staging must patch locations to let :term:`CWL` runtime resolve the files according to glob definitions.

        .. warning::
            Only remote :term:`Provider` implementations (which auto-generate a pseudo :term:`CWL` to map components)
            that produce outputs with inconsistent file names as described above should set attribute
            :attr:`WpsProcessInterface.stage_output_id_nested` accordingly. For :term:`Process` that directly provide
            an actual :term:`CWL` :term:`Application Package` definition (e.g.: Docker application), auto-mapping
            of glob patterns should be avoided, as it is expected that the :term:`CWL` contains real mapping to be
            respected for correct execution and retrieval of outputs from the application.
        """
        for result in results:
            res_id = get_any_id(result)
            if res_id not in expected_outputs:
                continue

            # plan ahead when list of multiple output values could be supported
            result_values = get_any_value(result)
            if not isinstance(result_values, list):
                result_values = [result_values]
            if self.stage_output_id_nested:
                cwl_out_dir = "/".join([out_dir.rstrip("/"), res_id])
            else:
                cwl_out_dir = out_dir.rstrip("/")
            os.makedirs(cwl_out_dir, mode=0o700, exist_ok=True)
            for value in result_values:
                src_name = value.split("/")[-1]
                dst_path = "/".join([cwl_out_dir, src_name])
                # performance improvement:
                #   Bypass download if file can be resolved as local resource (already fetched or same server).
                #   Because CWL expects the file to be in specified 'out_dir', make a link for it to be found
                #   even though the file is stored in the full job output location instead (already staged by step).
                map_path = map_wps_output_location(value, self.settings)
                as_link = False
                if map_path:
                    LOGGER.info(
                        "Detected result [%s] from [%s] as local reference to this instance. "
                        "Skipping fetch and using local copy in output destination: [%s]",
                        res_id, value, dst_path)
                    LOGGER.debug("Mapped result [%s] to local reference: [%s]",
                                 value, map_path)
                    src_path = map_path
                    as_link = True
                else:
                    LOGGER.info(
                        "Fetching result [%s] from [%s] to CWL output destination: [%s]",
                        res_id, value, dst_path)
                    src_path = value
                fetch_file(src_path,
                           cwl_out_dir,
                           settings=self.settings,
                           link=as_link)
Example #13
0
def parse_wps_inputs(wps_process, job):
    # type: (ProcessOWS, Job) -> List[Tuple[str, OWS_Input_Type]]
    """
    Parses expected WPS process inputs against submitted job input values considering supported process definitions.
    """
    complex_inputs = {}  # type: Dict[str, ComplexInput]
    for process_input in wps_process.dataInputs:
        if WPS_COMPLEX_DATA in process_input.dataType:
            complex_inputs[process_input.identifier] = process_input

    try:
        wps_inputs = []
        # parse both dict and list type inputs
        job_inputs = job.inputs.items() if isinstance(
            job.inputs, dict) else job.get("inputs", [])
        for job_input in job_inputs:
            if isinstance(job_input, tuple):
                input_id = job_input[0]
                input_val = job_input[1]
                job_input = input_val
            else:
                input_id = get_any_id(job_input)
                input_val = get_any_value(job_input)
            # in case of array inputs, must repeat (id,value)
            if isinstance(input_val, list):
                input_values = input_val
                input_details = input_val  # each value has its own metadata
            else:
                input_values = [input_val]
                input_details = [
                    job_input
                ]  # metadata directly in definition, not nested per array value

            # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
            input_values = [
                # when value is an array of dict that each contain a file reference
                (get_any_value(val)[7:] if str(get_any_value(val)).startswith(
                    "file://") else get_any_value(val)) if isinstance(
                        val, dict) else
                # when value is directly a single dict with file reference
                (val[7:] if str(val).startswith("file://") else val)
                for val in input_values
            ]

            for input_value, input_detail in zip(input_values, input_details):
                # need to use ComplexDataInput structure for complex input
                if input_id in complex_inputs:
                    # if provided, pass down specified data input format to allow validation against supported formats
                    ctype = get_field(input_detail, "type", default=None)
                    encoding = None
                    if not ctype:
                        media_format = get_field(input_detail,
                                                 "format",
                                                 default=None)
                        if isinstance(media_format, dict):
                            ctype = get_field(input_detail,
                                              "mime_type",
                                              search_variations=True,
                                              default=None)
                            encoding = get_field(input_detail,
                                                 "encoding",
                                                 search_variations=True,
                                                 default=None)
                    wps_inputs.append((input_id,
                                       ComplexDataInput(input_value,
                                                        mimeType=ctype,
                                                        encoding=encoding)))
                # need to use literal String for anything else than complex
                # FIXME: pre-validate allowed literal values?
                # TODO: BoundingBox not supported
                else:
                    wps_inputs.append((input_id, str(input_value)))
    except KeyError:
        wps_inputs = []
    return wps_inputs
Example #14
0
def collect_statistics(process, settings=None, job=None, rss_start=None):
    # type: (Optional[psutil.Process], Optional[SettingsType], Optional[Job], Optional[int]) -> Optional[Statistics]
    """
    Collect any available execution statistics and store them in the :term:`Job` if provided.
    """
    try:
        mem_used = None
        if job:
            mem_info = list(
                filter(
                    lambda line: "cwltool" in line and "memory used" in line,
                    job.logs))
            mem_used = None
            if mem_info:
                mem_info = mem_info[0].split(":")[-1].strip()
                mem_used = parse_number_with_unit(mem_info, binary=True)

        stats = {}  # type: JSON
        if mem_used:
            stats["application"] = {
                # see: 'cwltool.job.JobBase.process_monitor', reported memory in logs uses 'rss'
                "usedMemory": apply_number_with_unit(mem_used, binary=True),
                "usedMemoryBytes": mem_used,
            }

        rss = None
        if process:
            proc_info = process.memory_full_info()
            rss = getattr(proc_info, "rss", 0)
            uss = getattr(proc_info, "uss", 0)
            vms = getattr(proc_info, "vms", 0)
            stats["process"] = {
                "rss": apply_number_with_unit(rss, binary=True),
                "rssBytes": rss,
                "uss": apply_number_with_unit(uss, binary=True),
                "ussBytes": uss,
                "vms": apply_number_with_unit(vms, binary=True),
                "vmsBytes": vms,
            }
            fields = [("usedThreads", "num_threads"), ("usedCPU", "cpu_num"),
                      ("usedHandles", "num_handles")]
            for field, method in fields:
                func = getattr(process, method, None)
                stats["process"][field] = func() if func is not None else 0

        if rss_start and rss:
            # diff of RSS between start/end to consider only execution of the job steps
            # this more accurately reports used memory by the execution itself, omitting celery worker's base memory
            rss_diff = rss - rss_start
            stats["process"]["usedMemory"] = apply_number_with_unit(
                rss_diff, binary=True)
            stats["process"]["usedMemoryBytes"] = rss_diff

        total_size = 0
        if job:
            stats["outputs"] = {}
            for result in job.results:
                res_ref = get_any_value(result, file=True)
                if res_ref and isinstance(res_ref, str):
                    if res_ref.startswith(
                            f"/{job.id}"):  # pseudo-relative reference
                        out_dir = get_wps_output_dir(settings)
                        res_ref = os.path.join(out_dir, res_ref[1:])
                    if os.path.isfile(res_ref):
                        res_stat = os.stat(res_ref)
                        res_id = get_any_id(result)
                        res_size = res_stat.st_size
                        stats["outputs"][res_id] = {
                            "size": apply_number_with_unit(res_size,
                                                           binary=True),
                            "sizeBytes": res_size,
                        }
                        total_size += res_size
            stats["process"]["totalSize"] = apply_number_with_unit(total_size,
                                                                   binary=True)
            stats["process"]["totalSizeBytes"] = total_size

        if stats and job:
            job.statistics = stats
        return stats or None
    except Exception as exc:  # pragma: no cover
        LOGGER.warning(
            "Ignoring error that occurred during statistics collection [%s]",
            str(exc),
            exc_info=exc)
Example #15
0
def get_job_results_response(job, container, headers=None):
    # type: (Job, AnySettingsContainer, Optional[AnyHeadersContainer]) -> AnyResponseType
    """
    Generates the :term:`OGC` compliant :term:`Job` results response according to submitted execution parameters.

    Parameters that impact the format of the response are:
        - Amount of outputs to be returned.
        - Parameter ``response: raw|document``
        - Parameter ``transmissionMode: value|reference`` per output if ``response: raw``.

    .. seealso::
        More details available for each combination:
        - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response
        - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7

    :param job: Job for which to generate the results response.
    :param container: Application settings.
    :param headers: Additional headers to provide in the response.
    """
    raise_job_dismissed(job, container)
    raise_job_bad_status(job, container)

    # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw'
    # See:
    #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document)
    #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document
    is_raw = job.execution_response == ExecuteResponse.RAW
    results, refs = get_results(
        job,
        container,
        value_key="value",
        schema=JobInputsOutputsSchema.
        OGC,  # not strict to provide more format details
        link_references=is_raw)
    headers = headers or {}
    if "location" not in headers:
        headers["Location"] = job.status_url(container)

    if not is_raw:
        # note:
        #   Cannot add "links" field in response body because variable Output ID keys are directly at the root
        #   Possible conflict with an output that would be named "links".
        results = sd.Result().deserialize(results)
        return HTTPOk(json=results, headers=headers)

    if not results:  # avoid schema validation error if all by reference
        # Status code 204 for empty body
        # see:
        #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref
        refs.extend(headers.items())
        return HTTPNoContent(headers=refs)

    # raw response can be data-only value, link-only or a mix of them
    if results:
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one
        out_vals = list(results.items(
        ))  # type: List[Tuple[str, ExecutionResultValue]]  # noqa
        out_info = out_vals[0][-1]  # type: ExecutionResultValue
        out_type = get_any_value(out_info, key=True)
        out_data = get_any_value(out_info)

        # FIXME: https://github.com/crim-ca/weaver/issues/376
        #  implement multipart, both for multi-output IDs and array-output under same ID
        if len(results) > 1 or (isinstance(out_data, list)
                                and len(out_data) > 1):
            # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi
            raise HTTPNotImplemented(
                json={
                    "code":
                    "NotImplemented",
                    "type":
                    "NotImplemented",
                    "detail":
                    "Multipart results with 'transmissionMode=value' and 'response=raw' not implemented.",
                })

        # single value only
        out_data = out_data[0] if isinstance(out_data, list) else out_data
        if out_type == "href":
            out_path = map_wps_output_location(out_data,
                                               container,
                                               exists=True,
                                               url=False)
            out_type = out_info.get("type")  # noqa
            out_headers = get_file_headers(out_path,
                                           download_headers=True,
                                           content_headers=True,
                                           content_type=out_type)
            resp = FileResponse(out_path)
            resp.headers.update(out_headers)
            resp.headers.update(headers)
        else:
            resp = HTTPOk(body=out_data,
                          charset="UTF-8",
                          content_type=ContentType.TEXT_PLAIN,
                          headers=headers)
    else:
        resp = HTTPOk(headers=headers)
    if refs:
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi
        resp.headerlist.extend(refs)
    return resp
Example #16
0
def get_results(
        job,  # type: Job
        container,  # type: AnySettingsContainer
        value_key=None,  # type: Optional[str]
        schema=JobInputsOutputsSchema.OLD,  # type: JobInputsOutputsSchemaType
        link_references=False,  # type: bool
):  # type: (...) -> Tuple[ExecutionResults, HeadersTupleType]
    """
    Obtains the job results with extended full WPS output URL as applicable and according to configuration settings.

    :param job: job from which to retrieve results.
    :param container: any container giving access to instance settings (to resolve reference output location).
    :param value_key:
        If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content.
        Otherwise, all values will have the specified key.
    :param schema:
        Selects which schema to employ for representing the output results (listing or mapping).
    :param link_references:
        If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference.
    :returns:
        Tuple with:
            - List or mapping of all outputs each with minimally an ID and value under the requested key.
            - List of ``Link`` headers for reference outputs when requested. Empty otherwise.
    """
    settings = get_settings(container)
    wps_url = get_wps_output_url(settings)
    if not wps_url.endswith("/"):
        wps_url = wps_url + "/"
    schema = JobInputsOutputsSchema.get(str(schema).lower(),
                                        default=JobInputsOutputsSchema.OLD)
    strict = schema.endswith("+strict")
    schema = schema.split("+")[0]
    ogc_api = schema == JobInputsOutputsSchema.OGC
    outputs = {} if ogc_api else []
    fmt_key = "mediaType" if ogc_api else "mimeType"
    out_ref = convert_output_params_schema(
        job.outputs, JobInputsOutputsSchema.OGC) if link_references else {}
    references = {}
    for result in job.results:
        rtype = "data" if any(k in result
                              for k in ["data", "value"]) else "href"
        value = get_any_value(result)
        out_key = rtype
        out_id = get_any_id(result)
        out_mode = out_ref.get(out_id, {}).get("transmissionMode")
        as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE
        if rtype == "href":
            # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.)
            if value.startswith("/"):
                value = str(value).lstrip("/")
            if "://" not in value:
                value = wps_url + value
        elif ogc_api:
            out_key = "value"
        elif value_key:
            out_key = value_key
        output = {out_key: value}
        if rtype == "href":  # required for the rest to be there, other fields optional
            if "mimeType" not in result:
                result["mimeType"] = get_format(
                    value, default=ContentType.TEXT_PLAIN).mime_type
            if ogc_api or not strict:
                output["type"] = result["mimeType"]
            if not ogc_api or not strict or as_ref:
                output["format"] = {fmt_key: result["mimeType"]}
                for field in ["encoding", "schema"]:
                    if field in result:
                        output["format"][field] = result[field]
        elif rtype != "href":
            # literal data
            # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51)
            dtype = result.get(
                "dataType",
                any2wps_literal_datatype(value, is_value=True) or "string")
            if ogc_api:
                output["dataType"] = {"name": dtype}
            else:
                output["dataType"] = dtype

        if ogc_api or as_ref:
            mapping = references if as_ref else outputs
            if out_id in mapping:
                output_list = mapping[out_id]
                if not isinstance(output_list, list):
                    output_list = [output_list]
                output_list.append(output)
                mapping[out_id] = output_list
            else:
                mapping[out_id] = output
        else:
            # if ordered insert supported by python version, insert ID first
            output = dict([("id", out_id)] + list(output.items()))  # noqa
            outputs.append(output)

    # needed to collect and aggregate outputs of same ID first in case of array
    # convert any requested link references using indices if needed
    headers = []
    for out_id, output in references.items():
        res_links = make_result_link(out_id, output, job.id, settings)
        headers.extend([("Link", link) for link in res_links])

    return outputs, headers
Example #17
0
def get_results(job, container, value_key=None, ogc_api=False):
    # type: (Job, AnySettingsContainer, Optional[str], bool) -> Union[List[JSON], JSON]
    """
    Obtains the job results with extended full WPS output URL as applicable and according to configuration settings.

    :param job: job from which to retrieve results.
    :param container: any container giving access to instance settings (to resolve reference output location).
    :param value_key:
        If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content.
        Otherwise, all values will have the specified key.
    :param ogc_api:
        If ``True``, formats the results using the ``OGC API - Processes`` format.
    :returns: list of all outputs each with minimally an ID and value under the requested key.
    """
    wps_url = get_wps_output_url(container)
    if not wps_url.endswith("/"):
        wps_url = wps_url + "/"
    outputs = {} if ogc_api else []
    fmt_key = "mediaType" if ogc_api else "mimeType"
    for result in job.results:
        rtype = "data" if any(k in result
                              for k in ["data", "value"]) else "href"
        value = get_any_value(result)
        out_id = get_any_id(result)
        out_key = rtype
        if rtype == "href":
            # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.)
            if value.startswith("/"):
                value = str(value).lstrip("/")
            if "://" not in value:
                value = wps_url + value
        elif ogc_api:
            out_key = "value"
        elif value_key:
            out_key = value_key
        output = {out_key: value}
        if rtype == "href":  # required for the rest to be there, other fields optional
            if "mimeType" not in result:
                result["mimeType"] = get_format(
                    value, default=CONTENT_TYPE_TEXT_PLAIN).mime_type
            output["format"] = {fmt_key: result["mimeType"]}
            for field in ["encoding", "schema"]:
                if field in result:
                    output["format"][field] = result[field]
        elif rtype != "href":
            # literal data
            # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51)
            dtype = result.get(
                "dataType",
                any2wps_literal_datatype(value, is_value=True) or "string")
            if ogc_api:
                output["dataType"] = {"name": dtype}
            else:
                output["dataType"] = dtype

        if ogc_api:
            if out_id in outputs:
                output_list = outputs[out_id]
                if not isinstance(output_list, list):
                    output_list = [output_list]
                output_list.append(output)
                outputs[out_id] = output_list
            else:
                outputs[out_id] = output
        else:
            # if ordered insert supported by python version, insert ID first
            output = dict([("id", out_id)] + list(output.items()))  # noqa
            outputs.append(output)
    return outputs
Example #18
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        # TODO: test
        visible = self.is_visible()
        if not visible:  # includes private visibility and non-existing cases
            if visible is None:
                LOGGER.info(
                    "Process [%s] access is unauthorized on [%s] - deploying as admin.",
                    self.process, self.url)
            elif visible is False:
                LOGGER.info(
                    "Process [%s] is not deployed on [%s] - deploying.",
                    self.process, self.url)
            # TODO: Maybe always redeploy? What about cases of outdated deployed process?
            try:
                self.deploy()
            except Exception as exc:
                # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed
                pass_http_error(exc, [HTTPConflict, HTTPInternalServerError])

        LOGGER.info("Process [%s] enforced to public visibility.",
                    self.process)
        try:
            self.set_visibility(visibility=VISIBILITY_PUBLIC)
        # TODO: support for Spacebel, remove when visibility route properly implemented on ADES
        except Exception as exc:
            pass_http_error(exc, HTTPNotFound)

        self.update_status("Preparing execute request for remote ADES.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for [%s]", self.process)

        execute_body_inputs = []
        execute_req_id = "id"
        execute_req_input_val_href = "href"
        execute_req_input_val_data = "data"
        for workflow_input_key, workflow_input_value in workflow_inputs.items(
        ):
            if isinstance(workflow_input_value, list):
                for workflow_input_value_item in workflow_input_value:
                    if isinstance(
                            workflow_input_value_item,
                            dict) and "location" in workflow_input_value_item:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_href:
                            workflow_input_value_item["location"]
                        })
                    else:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_data:
                            workflow_input_value_item
                        })
            else:
                if isinstance(workflow_input_value,
                              dict) and "location" in workflow_input_value:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_href:
                        workflow_input_value["location"]
                    })
                else:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_data:
                        workflow_input_value
                    })
        for exec_input in execute_body_inputs:
            if execute_req_input_val_href in exec_input and isinstance(
                    exec_input[execute_req_input_val_href], str):
                if exec_input[execute_req_input_val_href].startswith(
                        "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)):
                    exec_input[execute_req_input_val_href] = "file{0}".format(
                        exec_input[execute_req_input_val_href]
                        [len(OPENSEARCH_LOCAL_FILE_SCHEME):])
                elif exec_input[execute_req_input_val_href].startswith(
                        "file://"):
                    exec_input[execute_req_input_val_href] = self.host_file(
                        exec_input[execute_req_input_val_href])
                    LOGGER.debug("Hosting intermediate input [%s] : [%s]",
                                 exec_input[execute_req_id],
                                 exec_input[execute_req_input_val_href])

        execute_body_outputs = [{
            execute_req_id:
            output,
            "transmissionMode":
            EXECUTE_TRANSMISSION_MODE_REFERENCE
        } for output in expected_outputs]
        self.update_status("Executing job on remote ADES.",
                           REMOTE_JOB_PROGRESS_EXECUTION,
                           status.STATUS_RUNNING)

        execute_body = dict(mode=EXECUTE_MODE_ASYNC,
                            response=EXECUTE_RESPONSE_DOCUMENT,
                            inputs=execute_body_inputs,
                            outputs=execute_body_outputs)
        request_url = self.url + process_jobs_uri.format(
            process_id=self.process)
        response = self.make_request(method="POST",
                                     url=request_url,
                                     json=execute_body,
                                     retry=True)
        if response.status_code != 201:
            raise Exception(
                "Was expecting a 201 status code from the execute request : {0}"
                .format(request_url))

        job_status_uri = response.headers["Location"]
        job_status = self.get_job_status(job_status_uri)
        job_status_value = status.map_status(job_status["status"])

        self.update_status(
            "Monitoring job on remote ADES : {0}".format(job_status_uri),
            REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

        while job_status_value not in status.JOB_STATUS_CATEGORIES[
                status.STATUS_CATEGORY_FINISHED]:
            sleep(5)
            job_status = self.get_job_status(job_status_uri)
            job_status_value = status.map_status(job_status["status"])

            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            self.update_status(
                get_job_log_msg(status=job_status_value,
                                message=get_any_message(job_status),
                                progress=job_status.get("percentCompleted", 0),
                                duration=job_status.get(
                                    "duration", None)),  # get if available
                map_progress(job_status.get("percentCompleted",
                                            0), REMOTE_JOB_PROGRESS_MONITORING,
                             REMOTE_JOB_PROGRESS_FETCH_OUT),
                status.STATUS_RUNNING)

        if job_status_value != status.STATUS_SUCCEEDED:
            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            raise Exception(job_status)

        self.update_status("Fetching job outputs from remote ADES.",
                           REMOTE_JOB_PROGRESS_FETCH_OUT,
                           status.STATUS_RUNNING)
        results = self.get_job_results(job_status["jobID"])
        for result in results:
            if get_any_id(result) in expected_outputs:
                # This is where cwl expect the output file to be written
                # TODO We will probably need to handle multiple output value...
                dst_fn = "/".join([
                    out_dir.rstrip("/"), expected_outputs[get_any_id(result)]
                ])

                # TODO Should we handle other type than File reference?
                resp = request_extra("get",
                                     get_any_value(result),
                                     allow_redirects=True,
                                     settings=self.settings)
                LOGGER.debug(
                    "Fetching result output from [%s] to cwl output destination: [%s]",
                    get_any_value(result), dst_fn)
                with open(dst_fn, mode="wb") as dst_fh:
                    dst_fh.write(resp.content)

        self.update_status("Execution on remote ADES completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED,
                           status.STATUS_SUCCEEDED)
Example #19
0
def test_get_any_value():
    assert get_any_value({}) is None
    assert get_any_value({}, default=null) is null
    assert get_any_value({}, default=1) == 1
    assert get_any_value({"data": 2}) == 2
    assert get_any_value({"data": 2}, default=1) == 2
    assert get_any_value({"data": 2}, data=False) is None
    assert get_any_value({"data": 2}, default=1, data=False) == 1
    assert get_any_value({"value": 2}) == 2
    assert get_any_value({"value": 2}, default=1) == 2
    assert get_any_value({"value": 2}, data=False) is None
    assert get_any_value({"value": 2}, default=1, data=False) == 1
    assert get_any_value({"href": "http://localhost/test.txt"}) == "http://localhost/test.txt"
    assert get_any_value({"href": "http://localhost/test.txt"}, default=1) == "http://localhost/test.txt"
    assert get_any_value({"href": "http://localhost/test.txt"}, file=False) is None
    assert get_any_value({"href": "http://localhost/test.txt"}, file=False, default=1) == 1
    assert get_any_value({"reference": "http://localhost/test.txt"}) == "http://localhost/test.txt"
    assert get_any_value({"reference": "http://localhost/test.txt"}, default=1) == "http://localhost/test.txt"
    assert get_any_value({"reference": "http://localhost/test.txt"}, file=False) is None
    assert get_any_value({"reference": "http://localhost/test.txt"}, file=False, default=1) == 1
    assert get_any_value({"file": "http://localhost/test.txt"}) is None
    assert get_any_value({"data": 1, "value": 2, "href": "http://localhost/test.txt"}, file=False, data=False) is None
Example #20
0
def execute_process(self, job_id, url, headers=None):
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")
    settings = get_settings(app)
    task_logger = get_task_logger(__name__)
    load_pywps_config(settings)

    task_logger.debug("Job task setup.")

    # reset the connection because we are in a forked celery process
    db = get_db(app, reset_connection=True)
    store = db.get_store(StoreJobs)

    job = store.fetch_by_id(job_id)
    job.task_id = self.request.id
    job.progress = JOB_PROGRESS_SETUP
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    try:
        try:
            job.progress = JOB_PROGRESS_DESCRIBE
            job.save_log(
                logger=task_logger,
                message="Execute WPS request for process [{!s}]".format(
                    job.process))
            ssl_verify = get_ssl_verify_option("get", url, settings=settings)
            wps = WebProcessingService(url=url,
                                       headers=get_cookie_headers(headers),
                                       verify=ssl_verify)
            set_wps_language(wps, accept_language=job.accept_language)
            raise_on_xml_exception(wps._capabilities)  # noqa
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS capabilities. Error: [{}].".format(
                    str(ex)))
        try:
            process = wps.describeprocess(job.process)
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS process description. Error: [{}].".
                format(str(ex)))

        # prepare inputs
        job.progress = JOB_PROGRESS_GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        complex_inputs = []
        for process_input in process.dataInputs:
            if WPS_COMPLEX_DATA in process_input.dataType:
                complex_inputs.append(process_input.identifier)

        try:
            wps_inputs = list()
            for process_input in job.inputs:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
                # in case of array inputs, must repeat (id,value)
                input_values = process_value if isinstance(
                    process_value, list) else [process_value]

                # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
                input_values = [
                    val[7:] if str(val).startswith("file://") else val
                    for val in input_values
                ]

                # need to use ComplexDataInput structure for complex input
                # need to use literal String for anything else than complex
                # TODO: BoundingBox not supported
                wps_inputs.extend([
                    (input_id, ComplexDataInput(input_value)
                     if input_id in complex_inputs else str(input_value))
                    for input_value in input_values
                ])
        except KeyError:
            wps_inputs = []

        # prepare outputs
        job.progress = JOB_PROGRESS_GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in process.processOutputs]

        mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC
        job.progress = JOB_PROGRESS_EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job.process,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           mode=mode,
                                           job_uuid=job.id)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(logger=task_logger,
                     level=logging.DEBUG,
                     message="Updated job status location: [{}].".format(
                         wps_status_path))

        job.status = map_status(STATUS_STARTED)
        job.status_message = execution.statusMessage or "{} initiation done.".format(
            str(job))
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    "Could not read status document after {} retries. Giving up."
                    .format(max_retries))
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=wait_secs(run_step))
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job.status_message = "Job execution monitoring (progress: {}%, status: {})."\
                                     .format(execution.percentCompleted, job_msg or "n/a")
                # job.save_log(logger=task_logger)
                # job = store.update_job(job)

                if execution.isComplete():
                    job.mark_finished()
                    job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
                    msg_progress = " (status: {})".format(
                        job_msg) if job_msg else ""
                    if execution.isSucceded():
                        job.status = map_status(STATUS_SUCCEEDED)
                        job.status_message = "Job succeeded{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        job.status_message = "Job failed{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = "Could not read status XML document for {!s}. Trying again...".format(
                    job)
                job.save_log(errors=execution.errors, logger=task_logger)
                sleep(1)
            else:
                # job.status_message = "Update {}...".format(str(job))
                # job.save_log(logger=task_logger)
                num_retries = 0
                run_step += 1
            finally:
                job = store.update_job(job)

    except Exception as exc:
        LOGGER.exception("Failed running [%s]", job)
        job.status = map_status(STATUS_FAILED)
        job.status_message = "Failed to run {!s}.".format(job)
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR
        exception_class = "{}.{}".format(
            type(exc).__module__,
            type(exc).__name__)
        errors = "{0}: {1!s}".format(exception_class, exc)
        job.save_log(errors=errors, logger=task_logger)
    finally:
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
        job.status_message = "Job {}.".format(job.status)
        job.save_log(logger=task_logger)

        # Send email if requested
        if job.notification_email is not None:
            job.progress = JOB_PROGRESS_NOTIFY
            try:
                notify_job_complete(job, job.notification_email, settings)
                message = "Notification email sent successfully."
                job.save_log(logger=task_logger, message=message)
            except Exception as exc:
                exception_class = "{}.{}".format(
                    type(exc).__module__,
                    type(exc).__name__)
                exception = "{0}: {1!s}".format(exception_class, exc)
                message = "Couldn't send notification email ({})".format(
                    exception)
                job.save_log(errors=message,
                             logger=task_logger,
                             message=message)

        job.progress = JOB_PROGRESS_DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status