Beispiel #1
0
def test_request_extra_zero_values():
    """
    Test that zero-value ``retries`` and ``backoff`` are not ignored.
    """
    def mock_request(*_, **__):
        mocked_resp = Response()
        mocked_resp.status_code = HTTPNotFound.code
        return mocked_resp

    with mock.patch("requests.Session.request", side_effect=mock_request) as mocked_request:
        resp = request_extra("get", "http://whatever", retries=0, allowed_codes=[HTTPOk.code])
        assert resp.status_code == HTTPGatewayTimeout.code, "failing request with no retry should produce timeout"
        assert mocked_request.call_count == 1

    sleep_counter = {"called_count": 0, "called_with": []}

    def mock_sleep(delay):
        sleep_counter["called_count"] += 1
        sleep_counter["called_with"].append(delay)

    with mock.patch("weaver.utils.get_settings", return_value={"cache.requests.enable": "false"}):
        with mock.patch("requests.Session.request", side_effect=mock_request) as mocked_request:
            with mock.patch("weaver.utils.time.sleep", side_effect=mock_sleep):
                # if backoff is not correctly handled as explicit zero, the default backoff value would be used
                # to calculate the delay between requests which should increase with backoff formula and retry count
                resp = request_extra("get", "http://whatever", backoff=0, retries=3, allowed_codes=[HTTPOk.code])
                assert resp.status_code == HTTPGatewayTimeout.code
                assert mocked_request.call_count == 4  # first called directly, then 3 times for each retry

    # since backoff factor multiplies all incrementally increasing delays between requests,
    # proper detection of input backoff=0 makes all sleep calls equal to zero
    assert all(backoff == 0 for backoff in sleep_counter["called_with"])
    assert sleep_counter["called_count"] == 3  # first direct call doesn't have any sleep from retry
Beispiel #2
0
def get_processes(request):
    """
    List registered processes (GetCapabilities). Optionally list both local and provider processes.
    """
    detail = asbool(request.params.get("detail", True))
    try:
        # get local processes and filter according to schema validity
        # (previously deployed process schemas can become invalid because of modified schema definitions
        processes, invalid_processes = get_processes_filtered_by_valid_schemas(
            request)
        if invalid_processes:
            raise HTTPServiceUnavailable(
                "Previously deployed processes are causing invalid schema integrity errors. "
                "Manual cleanup of following processes is required: {}".format(
                    invalid_processes))
        response_body = {
            "processes":
            processes if detail else [get_any_id(p) for p in processes]
        }

        # if 'EMS' and '?providers=True', also fetch each provider's processes
        settings = get_settings(request)
        if get_weaver_configuration(settings) == WEAVER_CONFIGURATION_EMS:
            queries = parse_request_query(request)
            if "providers" in queries and asbool(
                    queries["providers"][0]) is True:
                prov_url = "{host}/providers".format(host=request.host_url)
                providers_response = request_extra("GET",
                                                   prov_url,
                                                   settings=settings,
                                                   headers=request.headers,
                                                   cookies=request.cookies)
                providers = providers_response.json()
                response_body.update({"providers": providers})
                for i, provider in enumerate(providers):
                    provider_id = get_any_id(provider)
                    proc_url = "{host}/providers/{prov}/processes".format(
                        host=request.host_url, prov=provider_id)
                    response = request_extra("GET",
                                             proc_url,
                                             settings=settings,
                                             headers=request.headers,
                                             cookies=request.cookies)
                    processes = response.json().get("processes", [])
                    response_body["providers"][i].update({
                        "processes":
                        processes
                        if detail else [get_any_id(p) for p in processes]
                    })
        return HTTPOk(json=response_body)
    except colander.Invalid as ex:
        raise HTTPBadRequest("Invalid schema: [{!s}]".format(ex))
Beispiel #3
0
 def _request_extra_various(_mime_type):
     """
     Attempts multiple request-retry variants to be as permissive as possible to sporadic/temporary failures.
     """
     _mime_type_url = "{}{}".format(
         IANA_NAMESPACE_DEFINITION[IANA_NAMESPACE], _mime_type)
     try:
         resp = request_extra(
             "get",
             _mime_type_url,
             retries=3,
             allowed_codes=[HTTPOk.code, HTTPNotFound.code])
         if resp.status_code == HTTPOk.code:
             return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE,
                                 _mime_type)
     except ConnectionError:
         pass
     try:
         resp = urlopen(
             _mime_type_url)  # nosec: B310 # is hardcoded HTTP(S)
         if resp.code == HTTPOk.code:
             return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE,
                                 _mime_type)
     except HTTPError:
         pass
     return None
Beispiel #4
0
def test_request_extra_intervals():
    """
    Verifies that ``intervals`` are used for calling the retry operations instead of ``backoff``/``retries``.
    """

    def mock_request(*_, **__):
        m_resp = Response()
        m_resp.status_code = HTTPNotFound.code
        return m_resp

    sleep_counter = {"called_count": 0, "called_with": []}

    def mock_sleep(delay):
        if delay > 1e5:
            sleep_counter["called_count"] += 1
            sleep_counter["called_with"].append(delay)

    with mock.patch("weaver.utils.get_settings", return_value={"cache.requests.enable": "false"}):
        with mock.patch("requests.Session.request", side_effect=mock_request) as mocked_request:
            with mock.patch("weaver.utils.time.sleep", side_effect=mock_sleep):
                intervals = [1e6, 3e6, 5e6]  # random values that shouldn't normally be used with sleep() (too big)
                # values will not match if backoff/retries are not automatically corrected by internals parameter
                resp = request_extra("get", "http://whatever",
                                     only_server_errors=False, intervals=intervals,
                                     backoff=1000, retries=10)  # backoff/retries must be ignored here
                assert resp.status_code == HTTPGatewayTimeout.code
                assert mocked_request.call_count == 4  # first called directly, then 3 times, one for each interval
                # WARNING:
                #   cannot safely use mock counter since everything can increase it
                #   notably debugger/breakpoints that uses more calls to sleep()
                #   instead use our custom counter that employs unrealistic values
                assert sleep_counter["called_count"] == 3  # first direct call doesn't have any sleep interval
                assert all(called == expect for called, expect in zip(sleep_counter["called_with"], intervals))
Beispiel #5
0
    def capabilities(self, url=None):
        # type: (Optional[str]) -> OperationResult
        """
        List all available :term:`Process` on the instance.

        .. seealso::
            :ref:`proc_op_getcap`

        :param url: Instance URL if not already provided during client creation.
        """
        base = self._get_url(url)
        path = f"{base}/processes"
        query = {
            "detail": False
        }  # not supported by non-Weaver, but save the work if possible
        resp = request_extra("GET",
                             path,
                             params=query,
                             headers=self._headers,
                             settings=self._settings)
        result = self._parse_result(resp)
        processes = result.body.get("processes")
        if isinstance(processes, list) and all(
                isinstance(proc, dict) for proc in processes):
            processes = [get_any_id(proc) for proc in processes]
            result.body = processes
        return result
Beispiel #6
0
 def _query_features_paginated(self, params):
     # type: (Dict) -> Iterable[Dict, str]
     """
     :param params: query parameters
     """
     start_index = 1
     maximum_records = params.get("maximumRecords")
     template_url = self.get_template_url()
     base_url, query_params = self._prepare_query_url(template_url, params)
     while True:
         query_params["startRecord"] = start_index
         response = request_extra("get",
                                  base_url,
                                  params=query_params,
                                  intervals=list(range(1, 5)),
                                  allowed_codes=[HTTPOk.code],
                                  settings=self.settings)
         if response.status_code != 200:
             break
         json_body = response.json()
         features = json_body.get("features", [])
         for feature in features:
             yield feature, response.url
         n_received_features = len(features)
         n_received_so_far = start_index + n_received_features - 1  # index starts at 1
         total_results = json_body["totalResults"]
         if not n_received_features:
             break
         if n_received_so_far >= total_results:
             break
         if maximum_records and n_received_so_far >= maximum_records:
             break
         start_index += n_received_features
Beispiel #7
0
 def make_request(
         self,
         method,  # type: str
         url,  # type: str
         retry=False,  # type: Union[bool, int]
         cookies=None,  # type: Optional[AnyCookiesContainer]
         headers=None,  # type: Optional[AnyHeadersContainer]
         **kwargs,  # type: Any
 ):  # type: (...) -> AnyResponseType
     """
     Sends the request with additional parameter handling for the current process definition.
     """
     retries = int(retry) if retry is not None else 0
     cookies = CaseInsensitiveDict(cookies or {})
     headers = CaseInsensitiveDict(headers or {})
     cookies.update(self.get_auth_cookies())
     headers.update(self.headers.copy())
     headers.update(self.get_auth_headers())
     response = request_extra(method,
                              url=url,
                              settings=self.settings,
                              retries=retries,
                              headers=headers,
                              cookies=cookies,
                              **kwargs)
     return response
Beispiel #8
0
 def _request_extra_various(_mime_type):
     """
     Attempts multiple request-retry variants to be as permissive as possible to sporadic/temporary failures.
     """
     _mime_type_url = "{}{}".format(
         IANA_NAMESPACE_DEFINITION[IANA_NAMESPACE], _mime_type)
     try:
         resp = request_extra(
             "head",
             _mime_type_url,
             retries=3,
             timeout=0.5,
             allow_redirects=True,
             allowed_codes=[HTTPOk.code, HTTPNotFound.code])
         if resp.status_code == HTTPOk.code:
             return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE,
                                 _mime_type)
     except ConnectionError as exc:
         LOGGER.debug("Format request [%s] connection error: [%s]",
                      _mime_type_url, exc)
     try:
         resp = urlopen(_mime_type_url,
                        timeout=1)  # nosec: B310 # is hardcoded HTTP(S)
         if resp.code == HTTPOk.code:
             return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE,
                                 _mime_type)
     except HTTPError:
         pass
     return None
Beispiel #9
0
def check_wps_status(
        location=None,  # type: Optional[str]
        response=None,  # type: Optional[XML]
        sleep_secs=2,  # type: int
        verify=True,  # type: bool
        settings=None,  # type: Optional[AnySettingsContainer]
):  # type: (...) -> WPSExecution
    """
    Run :func:`owslib.wps.WPSExecution.checkStatus` with additional exception handling.

    :param location: job URL or file path where to look for job status.
    :param response: WPS response document of job status.
    :param sleep_secs: number of seconds to sleep before returning control to the caller.
    :param verify: Flag to enable SSL verification.
    :param settings: Application settings to retrieve any additional request parameters as applicable.
    :return: OWSLib.wps.WPSExecution object.
    """
    def _retry_file():
        LOGGER.warning(
            "Failed retrieving WPS status-location, attempting with local file."
        )
        out_path = get_wps_local_status_location(location, settings)
        if not out_path:
            raise HTTPNotFound(
                "Could not find file resource from [{}].".format(location))
        LOGGER.info("Resolved WPS status-location using local file reference.")
        return open(out_path, "r").read()

    execution = WPSExecution()
    if response:
        LOGGER.debug("Retrieving WPS status from XML response document...")
        xml = response
    elif location:
        xml_resp = HTTPNotFound()
        try:
            LOGGER.debug("Attempt to retrieve WPS status-location from URL...")
            xml_resp = request_extra("get",
                                     location,
                                     verify=verify,
                                     settings=settings)
            xml = xml_resp.content
        except Exception as ex:
            LOGGER.debug("Got exception during get status: [%r]", ex)
            xml = _retry_file()
        if xml_resp.status_code == HTTPNotFound.code:
            LOGGER.debug("Got not-found during get status: [%r]", xml)
            xml = _retry_file()
    else:
        raise Exception(
            "Missing status-location URL/file reference or response with XML object."
        )
    if isinstance(xml, str):
        xml = xml.encode("utf8", errors="ignore")
    execution.checkStatus(response=xml, sleepSecs=sleep_secs)
    if execution.response is None:
        raise Exception("Missing response, cannot check status.")
    if not isinstance(execution.response, lxml.etree._Element):  # noqa
        execution.response = lxml.etree.fromstring(execution.response)
    return execution
Beispiel #10
0
    def results(self, job_reference, out_dir=None, download=False, url=None):
        # type: (str, Optional[str], bool, Optional[str]) -> OperationResult
        """
        Obtain the results of a successful :term:`Job` execution.

        :param job_reference: Either the full :term:`Job` status URL or only its UUID.
        :param out_dir: Output directory where to store downloaded files if requested (default: CURDIR/JobID/<outputs>).
        :param download: Download any file reference found within results (CAUTION: could transfer lots of data!).
        :param url: Instance URL if not already provided during client creation.
        :returns: Result details and local paths if downloaded.
        """
        job_id, job_url = self._parse_job_ref(job_reference, url)
        status = self.status(job_url)
        if not status.success:
            return OperationResult(
                False, "Cannot process results from incomplete or failed job.",
                status.body)
        # use results endpoint instead of outputs to be OGC-API compliant, should be able to target non-Weaver instance
        # with this endpoint, outputs IDs are directly at the root of the body
        result_url = f"{job_url}/results"
        resp = request_extra("GET",
                             result_url,
                             headers=self._headers,
                             settings=self._settings)
        res_out = self._parse_result(resp)
        outputs = res_out.body
        if not res_out.success or not isinstance(res_out.body, dict):
            return OperationResult(
                False, "Could not retrieve any output results from job.",
                outputs)
        if not download:
            return OperationResult(True, "Listing job results.", outputs)

        # download file results
        if not any("href" in value for value in outputs.values()):
            return OperationResult(
                False,
                "Outputs were found but none are downloadable (only raw values?).",
                outputs)
        if not out_dir:
            out_dir = os.path.join(os.path.realpath(os.path.curdir), job_id)
        os.makedirs(out_dir, exist_ok=True)
        LOGGER.info("Will store job [%s] output results in [%s]", job_id,
                    out_dir)
        for output, value in outputs.items():
            is_list = True
            if not isinstance(value, list):
                value = [value]
                is_list = False
            for i, item in enumerate(value):
                if "href" in item:
                    file_path = fetch_file(item["href"], out_dir, link=False)
                    if is_list:
                        outputs[output][i]["path"] = file_path
                    else:
                        outputs[output]["path"] = file_path
        return OperationResult(True, "Retrieved job results.", outputs)
Beispiel #11
0
    def monitor(self,
                job_reference,
                timeout=None,
                interval=None,
                wait_for_status=STATUS_SUCCEEDED,
                url=None):
        # type: (str, Optional[int], Optional[int], str, Optional[str]) -> OperationResult
        """
        Monitor the execution of a :term:`Job` until completion.

        .. seealso::
            :ref:`proc_op_monitor`

        :param job_reference: Either the full :term:`Job` status URL or only its UUID.
        :param timeout: timeout (seconds) of maximum wait time for monitoring if completion is not reached.
        :param interval: wait interval (seconds) between polling monitor requests.
        :param wait_for_status: monitor until the requested status is reached (default: job failed or succeeded).
        :param url: Instance URL if not already provided during client creation.
        :return: result of the successful or failed job, or timeout of monitoring process.
        """
        job_id, job_url = self._parse_job_ref(job_reference, url)
        remain = timeout = timeout or self.monitor_timeout
        delta = interval or self.monitor_interval
        LOGGER.info("Monitoring job [%s] for %ss at intervals of %ss.", job_id,
                    timeout, delta)
        once = True
        body = None
        while remain >= 0 or once:
            resp = request_extra("GET",
                                 job_url,
                                 headers=self._headers,
                                 settings=self._settings)
            if resp.status_code != 200:
                return OperationResult(
                    False, "Could not find job with specified reference.",
                    {"job": job_reference})
            body = resp.json()
            status = body.get("status")
            if status == wait_for_status:
                return OperationResult(
                    True, f"Requested job status reached [{wait_for_status}].",
                    body)
            if status in JOB_STATUS_CATEGORIES[JOB_STATUS_CATEGORY_FINISHED]:
                return OperationResult(
                    False,
                    "Requested job status not reached, but job has finished.",
                    body)
            time.sleep(delta)
            remain -= delta
            once = False
        return OperationResult(
            False,
            f"Monitoring timeout reached ({timeout}s). Job did not complete in time.",
            body)
Beispiel #12
0
    def get_template_url(self):
        resp = request_extra("get",
                             self.osdd_url,
                             params=self.params,
                             settings=self.settings)
        resp.raise_for_status()

        xml = lxml.etree.fromstring(resp.content)
        xpath = "//*[local-name() = 'Url'][@rel='results']"
        url = xml.xpath(xpath)[0]  # type: XML
        return url.attrib["template"]
Beispiel #13
0
    def get_user_auth_header(self):
        # TODO: find a better way to generalize this to Magpie credentials?
        if not asbool(self.settings.get("ades.use_auth_token", True)):
            return {}

        ades_usr = self.settings.get("ades.username", None)
        ades_pwd = self.settings.get("ades.password", None)
        ades_url = self.settings.get("ades.wso2_hostname", None)
        ades_client = self.settings.get("ades.wso2_client_id", None)
        ades_secret = self.settings.get("ades.wso2_client_secret", None)
        access_token = None
        if ades_usr and ades_pwd and ades_url and ades_client and ades_secret:
            ades_body = {
                "grant_type": "password",
                "client_id": ades_client,
                "client_secret": ades_secret,
                "username": ades_usr,
                "password": ades_pwd,
                "scope": "openid",
            }
            ades_headers = {
                "Content-Type": CONTENT_TYPE_APP_FORM,
                "Accept": CONTENT_TYPE_APP_JSON
            }
            ades_access_token_url = "{}/oauth2/token".format(ades_url)
            cred_resp = request_extra("post",
                                      ades_access_token_url,
                                      data=ades_body,
                                      headers=ades_headers,
                                      settings=self.settings)
            cred_resp.raise_for_status()
            if CONTENT_TYPE_APP_JSON not in cred_resp.headers.get(
                    "Content-Type"):
                raise HTTPUnauthorized(
                    "Cannot retrieve valid access token using credential or ADES configurations."
                )
            access_token = cred_resp.json().get("access_token", None)
            if not access_token:
                warnings.warn(
                    "Could not retrieve valid access token although response is expected to contain one.",
                    MissingParameterWarning)
        else:
            warnings.warn(
                "Could not retrieve at least one of required login parameters: "
                "[ades.username, ades.password, ades.wso2_hostname, ades.wso2_client_id, ades.wso2_client_secret]",
                MissingParameterWarning)
        return {
            "Authorization":
            "Bearer {}".format(access_token) if access_token else None
        }
Beispiel #14
0
 def make_request(self, method, url, retry=False, **kwargs):
     # type: (str, str, Union[bool, int], Any) -> AnyResponseType
     """
     Sends the request with additional parameter handling for the current process definition.
     """
     retries = int(retry) if retry is not None else 0
     response = request_extra(method,
                              url=url,
                              settings=self.settings,
                              retries=retries,
                              headers=self.headers,
                              cookies=self.cookies,
                              **kwargs)
     return response
Beispiel #15
0
    def _fetch_datatsets_from_alternates_links(self, alternate_links):
        # Try loading from atom alternate link
        for link in alternate_links:
            if link["type"] == "application/atom+xml":
                resp = request_extra("get",
                                     link["href"],
                                     settings=self.settings)
                resp.raise_for_status()

                xml = lxml.etree.fromstring(resp.content)
                xpath = "//*[local-name() = 'entry']/*[local-name() = 'link']"
                links = xml.xpath(xpath)  # type: List[XML]
                return [link.attrib for link in links]
        return []
Beispiel #16
0
    def dismiss(self, job_reference, url=None):
        """
        Dismiss pending or running :term:`Job`, or clear result artifacts from a completed :term:`Job`.

        :param job_reference: Either the full :term:`Job` status URL or only its UUID.
        :param url: Instance URL if not already provided during client creation.
        :returns: Obtained result from the operation.
        """
        job_id, job_url = self._parse_job_ref(job_reference, url)
        LOGGER.debug("Dismissing job: [%s]", job_id)
        resp = request_extra("DELETE",
                             job_url,
                             headers=self._headers,
                             settings=self._settings)
        return self._parse_result(resp)
Beispiel #17
0
    def undeploy(self, process_id, url=None):
        # type: (str, Optional[str]) -> OperationResult
        """
        Undeploy an existing :term:`Process`.

        :param process_id: Identifier of the process to undeploy.
        :param url: Instance URL if not already provided during client creation.
        """
        base = self._get_url(url)
        path = f"{base}/processes/{process_id}"
        resp = request_extra("DELETE",
                             path,
                             headers=self._headers,
                             settings=self._settings)
        return self._parse_result(resp)
Beispiel #18
0
def test_request_extra_allowed_codes():
    """
    Verifies that ``allowed_codes`` only are considered as valid status instead of any non-error HTTP code.
    """
    mocked_codes = {"codes": [HTTPCreated.code, HTTPOk.code, HTTPCreated.code]}  # note: used in reverse order

    def mocked_request(*_, **__):
        mocked_resp = Response()
        mocked_resp.status_code = mocked_codes["codes"].pop()
        return mocked_resp

    with mock.patch("requests.Session.request", side_effect=mocked_request) as mocked:
        resp = request_extra("get", "http://whatever", retries=3, allowed_codes=[HTTPOk.code])
        assert resp.status_code == HTTPOk.code
        assert mocked.call_count == 2
Beispiel #19
0
    def status(self, job_reference, url=None):
        """
        Obtain the status of a :term:`Job`.

        .. seealso::
            :ref:`proc_op_status`

        :param job_reference: Either the full :term:`Job` status URL or only its UUID.
        :param url: Instance URL if not already provided during client creation.
        :returns: retrieved status of the job.
        """
        job_id, job_url = self._parse_job_ref(job_reference, url)
        LOGGER.info("Getting job status: [%s]", job_id)
        resp = request_extra("GET",
                             job_url,
                             headers=self._headers,
                             settings=self._settings)
        return self._parse_result(resp)
Beispiel #20
0
 def test_frontpage_format(self):
     resp = self.testapp.get(sd.api_frontpage_service.path,
                             headers=self.json_headers)
     assert resp.status_code == 200
     body = resp.json
     try:
         sd.FrontpageSchema().deserialize(body)
     except colander.Invalid as ex:
         body = json.dumps(body, indent=2, ensure_ascii=False)
         self.fail(
             "expected valid response format as defined in schema [{!s}] in\n{}"
             .format(ex, body))
     refs = [link["rel"] for link in body["links"]]
     assert len(body["links"]) == len(
         set(refs)), "Link relationships must all be unique"
     for link in body["links"]:
         path = link["href"]
         rtype = link["type"]
         if rtype in CONTENT_TYPE_ANY_XML:
             rtype = CONTENT_TYPE_ANY_XML
         else:
             rtype = [rtype]
         rel = link["rel"]
         if "localhost" in path:
             resp = self.testapp.get(
                 urlparse(path).path,
                 expect_errors=True)  # allow error for wps without queries
         else:
             resp = request_extra("GET",
                                  path,
                                  retries=3,
                                  retry_after=True,
                                  ssl_verify=False,
                                  allow_redirects=True)
         code = resp.status_code
         test = "({}) [{}]".format(rel, path)
         assert code in [
             200, 400
         ], "Reference link expected to be found, got [{}] for {}".format(
             code, test)
         ctype = resp.headers.get("Content-Type", "").split(";")[0].strip()
         assert ctype in rtype, "Reference link content does not match [{}]!=[{}] for {}".format(
             ctype, rtype, test)
Beispiel #21
0
 def request_callback(request):
     # type: (AnyRequestType) -> Tuple[int, Dict[str, str], str]
     """
     Operation called when the file-server URL is matched against incoming requests that have been mocked.
     """
     if (mock_head
             and request.method == "HEAD") or (mock_get
                                               and request.method == "GET"):
         file_url = "file://{}".format(
             request.url.replace(url, directory, 1))
         resp = request_extra(request.method, file_url, settings=settings)
         if resp.status_code == 200:
             headers = resp.headers
             content = resp.content
             file_path = file_url.replace("file://", "")
             mime_type, encoding = mimetypes.guess_type(file_path)
             headers.update({
                 "Server":
                 "mocked_wps_output",
                 "Date":
                 str(datetime.datetime.utcnow()),
                 "Content-Type":
                 mime_type or CONTENT_TYPE_TEXT_PLAIN,
                 "Content-Encoding":
                 encoding or "",
                 "Last-Modified":
                 str(
                     datetime.datetime.fromtimestamp(
                         os.stat(file_path).st_mtime))
             })
             if request.method == "HEAD":
                 headers.pop("Content-Length", None)
                 content = ""
             if request.method == "GET":
                 headers.update({
                     "Content-Length":
                     str(headers.get("Content-Length", len(resp.content))),
                 })
             headers.update(headers_override or {})
             return resp.status_code, headers, content
     else:
         return 405, {}, ""
     return 404, {}, ""
Beispiel #22
0
 def make_request(self,
                  method,
                  url,
                  retry,
                  status_code_mock=None,
                  **kwargs):
     response = request_extra(method,
                              url=url,
                              settings=self.settings,
                              headers=self.headers,
                              cookies=self.cookies,
                              verify=self.verify,
                              **kwargs)
     # TODO: Remove patch for Geomatys unreliable server
     if response.status_code == HTTPBadGateway.code and retry:
         sleep(10)
         response = self.make_request(method, url, False, **kwargs)
     if response.status_code == HTTPBadGateway.code and status_code_mock:
         response.status_code = status_code_mock
     return response
Beispiel #23
0
    def describe(self, process_id, url=None):
        # type: (str, Optional[str]) -> OperationResult
        """
        Describe the specified :term:`Process`.

        .. seealso::
            :ref:`proc_op_describe`

        :param process_id: Identifier of the process to describe.
        :param url: Instance URL if not already provided during client creation.
        """
        base = self._get_url(url)
        path = f"{base}/processes/{process_id}"
        resp = request_extra("GET",
                             path,
                             headers=self._headers,
                             settings=self._settings)
        # API response from this request can contain 'description' matching the process description
        # rather than a generic response 'description'. Enforce the provided message to avoid confusion.
        return self._parse_result(
            resp, message="Process description successfully retrieved.")
Beispiel #24
0
def check_wps_status(
        location=None,  # type: Optional[str]
        response=None,  # type: Optional[xml_util.XML]
        sleep_secs=2,  # type: int
        verify=True,  # type: bool
        settings=None,  # type: Optional[AnySettingsContainer]
):  # type: (...) -> WPSExecution
    """
    Run :func:`owslib.wps.WPSExecution.checkStatus` with additional exception handling.

    :param location: job URL or file path where to look for job status.
    :param response: WPS response document of job status.
    :param sleep_secs: number of seconds to sleep before returning control to the caller.
    :param verify: flag to enable SSL verification.
    :param settings: application settings to retrieve any additional request parameters as applicable.
    :returns: OWSLib.wps.WPSExecution object.
    """
    def _retry_file():
        # type: () -> str
        LOGGER.warning(
            "Failed retrieving WPS status-location, attempting with local file."
        )
        out_path = get_wps_local_status_location(location, settings)
        if not out_path:
            raise HTTPNotFound(
                f"Could not find file resource from [{location}].")
        LOGGER.info("Resolved WPS status-location using local file reference.")
        with open(out_path, mode="r", encoding="utf-8") as f:
            return f.read()

    execution = WPSExecution()
    if response:
        LOGGER.debug("Retrieving WPS status from XML response document...")
        xml_data = response
    elif location:
        xml_resp = HTTPNotFound()
        xml_data = None
        try:
            LOGGER.debug(
                "Attempt to retrieve WPS status-location from URL [%s]...",
                location)
            xml_resp = request_extra("get",
                                     location,
                                     verify=verify,
                                     settings=settings)
            xml_data = xml_resp.content
        except Exception as ex:
            LOGGER.debug(
                "Got exception during get status: [%r]. Will retry with local reference.",
                ex)
        if xml_resp.status_code != HTTPOk.code:
            LOGGER.debug(
                "WPS XML status not found: [%r]. Retrying with local reference.",
                xml_data)
            xml_data = _retry_file()
    else:
        raise Exception(
            "Missing status-location URL/file reference or response with XML object."
        )
    if isinstance(xml_data, str):
        xml_data = xml_data.encode("utf8", errors="ignore")
    execution.checkStatus(response=xml_data, sleepSecs=sleep_secs)
    if execution.response is None:
        raise Exception("Missing response, cannot check status.")
    if not isinstance(execution.response, xml_util.XML):
        execution.response = xml_util.fromstring(execution.response)
    return execution
Beispiel #25
0
    def test_frontpage_format(self):
        resp = self.testapp.get(sd.api_frontpage_service.path,
                                headers=self.json_headers)
        assert resp.status_code == 200
        body = resp.json
        try:
            sd.FrontpageSchema().deserialize(body)
        except colander.Invalid as ex:
            body = json.dumps(body, indent=2, ensure_ascii=False)
            self.fail(
                f"expected valid response format as defined in schema [{ex!s}] in\n{body}"
            )
        refs = [link["rel"] for link in body["links"]]
        assert len(body["links"]) == len(
            set(refs)), "Link relationships must all be unique"
        for link in body["links"]:
            path = link["href"]
            rtype = link["type"]
            if rtype in ContentType.ANY_XML:
                rtype = ContentType.ANY_XML
            else:
                rtype = [rtype]
            rel = link["rel"]

            # request endpoint to validate it is accessible
            if "localhost" in path:
                resp = self.testapp.get(
                    urlparse(path).path,
                    expect_errors=True)  # allow error for wps without queries
            else:
                resp = request_extra("GET",
                                     path,
                                     retries=3,
                                     retry_after=True,
                                     ssl_verify=False,
                                     allow_redirects=True)
            user_agent = get_header("user-agent", resp.request.headers)
            if resp.status_code == 403 and "python" in user_agent:
                # some sites will explicitly block bots, retry with mocked user-agent simulating human user access
                resp = request_extra("GET",
                                     path,
                                     headers={"User-Agent": "Mozilla"},
                                     retries=3,
                                     retry_after=True,
                                     ssl_verify=False,
                                     allow_redirects=True)

            # validate contents and expected media-type
            code = resp.status_code
            test = f"({rel}) [{path}]"
            assert code in [
                200, 400
            ], f"Reference link expected to be found, got [{code}] for {test}"

            # FIXME: patch broken content-type from reference websites
            #  (see https://github.com/opengeospatial/NamingAuthority/issues/183)
            ctype_header_links = {
                "http://schemas.opengis.net/wps/": ContentType.APP_XML
            }
            ctype = resp.headers.get("Content-Type", "").split(";")[0].strip()
            if not ctype:
                for ref_link in ctype_header_links:
                    if path.startswith(ref_link):
                        ctype = ctype_header_links[ref_link]
                        break
            assert ctype in rtype, f"Reference link content does not match [{ctype}]!=[{rtype}] for {test}"
Beispiel #26
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        # TODO: test
        visible = self.is_visible()
        if not visible:  # includes private visibility and non-existing cases
            if visible is None:
                LOGGER.info(
                    "Process [%s] access is unauthorized on [%s] - deploying as admin.",
                    self.process, self.url)
            elif visible is False:
                LOGGER.info(
                    "Process [%s] is not deployed on [%s] - deploying.",
                    self.process, self.url)
            # TODO: Maybe always redeploy? What about cases of outdated deployed process?
            try:
                self.deploy()
            except Exception as exc:
                # FIXME: support for Spacebel, avoid conflict error incorrectly handled, remove 500 when fixed
                pass_http_error(exc, [HTTPConflict, HTTPInternalServerError])

        LOGGER.info("Process [%s] enforced to public visibility.",
                    self.process)
        try:
            self.set_visibility(visibility=VISIBILITY_PUBLIC)
        # TODO: support for Spacebel, remove when visibility route properly implemented on ADES
        except Exception as exc:
            pass_http_error(exc, HTTPNotFound)

        self.update_status("Preparing execute request for remote ADES.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for [%s]", self.process)

        execute_body_inputs = []
        execute_req_id = "id"
        execute_req_input_val_href = "href"
        execute_req_input_val_data = "data"
        for workflow_input_key, workflow_input_value in workflow_inputs.items(
        ):
            if isinstance(workflow_input_value, list):
                for workflow_input_value_item in workflow_input_value:
                    if isinstance(
                            workflow_input_value_item,
                            dict) and "location" in workflow_input_value_item:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_href:
                            workflow_input_value_item["location"]
                        })
                    else:
                        execute_body_inputs.append({
                            execute_req_id:
                            workflow_input_key,
                            execute_req_input_val_data:
                            workflow_input_value_item
                        })
            else:
                if isinstance(workflow_input_value,
                              dict) and "location" in workflow_input_value:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_href:
                        workflow_input_value["location"]
                    })
                else:
                    execute_body_inputs.append({
                        execute_req_id:
                        workflow_input_key,
                        execute_req_input_val_data:
                        workflow_input_value
                    })
        for exec_input in execute_body_inputs:
            if execute_req_input_val_href in exec_input and isinstance(
                    exec_input[execute_req_input_val_href], str):
                if exec_input[execute_req_input_val_href].startswith(
                        "{0}://".format(OPENSEARCH_LOCAL_FILE_SCHEME)):
                    exec_input[execute_req_input_val_href] = "file{0}".format(
                        exec_input[execute_req_input_val_href]
                        [len(OPENSEARCH_LOCAL_FILE_SCHEME):])
                elif exec_input[execute_req_input_val_href].startswith(
                        "file://"):
                    exec_input[execute_req_input_val_href] = self.host_file(
                        exec_input[execute_req_input_val_href])
                    LOGGER.debug("Hosting intermediate input [%s] : [%s]",
                                 exec_input[execute_req_id],
                                 exec_input[execute_req_input_val_href])

        execute_body_outputs = [{
            execute_req_id:
            output,
            "transmissionMode":
            EXECUTE_TRANSMISSION_MODE_REFERENCE
        } for output in expected_outputs]
        self.update_status("Executing job on remote ADES.",
                           REMOTE_JOB_PROGRESS_EXECUTION,
                           status.STATUS_RUNNING)

        execute_body = dict(mode=EXECUTE_MODE_ASYNC,
                            response=EXECUTE_RESPONSE_DOCUMENT,
                            inputs=execute_body_inputs,
                            outputs=execute_body_outputs)
        request_url = self.url + process_jobs_uri.format(
            process_id=self.process)
        response = self.make_request(method="POST",
                                     url=request_url,
                                     json=execute_body,
                                     retry=True)
        if response.status_code != 201:
            raise Exception(
                "Was expecting a 201 status code from the execute request : {0}"
                .format(request_url))

        job_status_uri = response.headers["Location"]
        job_status = self.get_job_status(job_status_uri)
        job_status_value = status.map_status(job_status["status"])

        self.update_status(
            "Monitoring job on remote ADES : {0}".format(job_status_uri),
            REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

        while job_status_value not in status.JOB_STATUS_CATEGORIES[
                status.STATUS_CATEGORY_FINISHED]:
            sleep(5)
            job_status = self.get_job_status(job_status_uri)
            job_status_value = status.map_status(job_status["status"])

            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            self.update_status(
                get_job_log_msg(status=job_status_value,
                                message=get_any_message(job_status),
                                progress=job_status.get("percentCompleted", 0),
                                duration=job_status.get(
                                    "duration", None)),  # get if available
                map_progress(job_status.get("percentCompleted",
                                            0), REMOTE_JOB_PROGRESS_MONITORING,
                             REMOTE_JOB_PROGRESS_FETCH_OUT),
                status.STATUS_RUNNING)

        if job_status_value != status.STATUS_SUCCEEDED:
            LOGGER.debug(
                get_log_monitor_msg(job_status["jobID"], job_status_value,
                                    job_status.get("percentCompleted", 0),
                                    get_any_message(job_status),
                                    job_status.get("statusLocation")))
            raise Exception(job_status)

        self.update_status("Fetching job outputs from remote ADES.",
                           REMOTE_JOB_PROGRESS_FETCH_OUT,
                           status.STATUS_RUNNING)
        results = self.get_job_results(job_status["jobID"])
        for result in results:
            if get_any_id(result) in expected_outputs:
                # This is where cwl expect the output file to be written
                # TODO We will probably need to handle multiple output value...
                dst_fn = "/".join([
                    out_dir.rstrip("/"), expected_outputs[get_any_id(result)]
                ])

                # TODO Should we handle other type than File reference?
                resp = request_extra("get",
                                     get_any_value(result),
                                     allow_redirects=True,
                                     settings=self.settings)
                LOGGER.debug(
                    "Fetching result output from [%s] to cwl output destination: [%s]",
                    get_any_value(result), dst_fn)
                with open(dst_fn, mode="wb") as dst_fh:
                    dst_fh.write(resp.content)

        self.update_status("Execution on remote ADES completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED,
                           status.STATUS_SUCCEEDED)
Beispiel #27
0
def estimate_workflow_quote(quote, process):
    # type: (Quote, Process) -> Quote
    """
    Loop :term:`Workflow` sub-:term:`Process` steps to get their respective :term:`Quote`.
    """
    settings = get_settings()
    process_url = process.href(settings)
    quote_steps = []
    quote_params = []
    workflow_steps = get_package_workflow_steps(process_url)
    for step in workflow_steps:
        # retrieve quote from provider ADES
        # TODO: data source mapping
        process_step_url = get_process_location(step["reference"])
        process_quote_url = f"{process_step_url}/quotations"

        # FIXME: how to estimate data transfer if remote process (?)
        # FIXME: how to produce intermediate process inputs (?) - remove xfail in functional test once resolved
        # FIXME: must consider fan-out in case of parallel steps
        data = {"inputs": [], "outputs": []}
        resp = request_extra("POST",
                             process_quote_url,
                             json=data,
                             headers={"Prefer": "respond-async"})
        href = resp.headers.get("Location")
        status = QuoteStatus.SUBMITTED
        retry = 0
        abort = 3
        while status != QuoteStatus.COMPLETED and abort > 0:
            wait = wait_secs(retry)
            retry += 1
            resp = request_extra("GET", href)
            if resp.status_code != 200:
                abort -= 1
                wait = 5
            else:
                body = resp.json()
                status = QuoteStatus.get(body.get("status"))
                if status == QuoteStatus.COMPLETED:
                    quote_steps.append(href)
                    quote_params.append(body)
                    break
                if status == QuoteStatus.FAILED or status is None:
                    LOGGER.error(
                        "Quote estimation for sub-process [%s] under [%s] failed.",
                        step["name"], process.id)
                    break
            if abort <= 0:
                time.sleep(wait)
    if len(workflow_steps) != len(quote_params):
        raise QuoteEstimationError(
            "Could not obtain intermediate quote estimations for all Workflow steps."
        )

    # FIXME: what if different currencies are defined (?)
    currency = "CAD"
    params = {
        "price": 0,
        "currency": currency,
        "seconds": 0,
        "steps": quote_steps,
    }
    for step_params in quote_params:
        params["price"] += step_params["price"]
        params["seconds"] += step_params["estimatedSeconds"]

    quote.update(**params)
    return quote
Beispiel #28
0
    def execute(self,
                process_id,
                inputs=None,
                monitor=False,
                timeout=None,
                interval=None,
                url=None):
        # type: (str, Optional[Union[str, JSON]], bool, Optional[int], Optional[int], Optional[str]) -> OperationResult
        """
        Execute a :term:`Job` for the specified :term:`Process` with provided inputs.

        When submitting inputs with :term:`OGC API - Processes` schema, top-level ``inputs`` key is expected.
        Under it, either the mapping (key-value) or listing (id,value) representation are accepted.
        If ``inputs`` is not found, the alternative :term:`CWL` will be assumed.

        When submitting inputs with :term:`CWL` *job* schema, plain key-value(s) pairs are expected.
        All values should be provided directly under the key (including arrays), except for ``File``
        type that must include the ``class`` and ``path`` details.

        .. seealso::
            :ref:`proc_op_execute`

        :param process_id: Identifier of the process to execute.
        :param inputs:
            Literal :term:`JSON` or :term:`YAML` contents of the inputs submitted and inserted into the execution body,
            using either the :term:`OGC API - Processes` or :term:`CWL` format, or a file path/URL referring to them.
        :param monitor:
            Automatically perform :term:`Job` execution monitoring until completion or timeout to obtain final results.
            If requested, this operation will become blocking until either the completed status or timeout is reached.
        :param timeout:
            Monitoring timeout (seconds) if requested.
        :param interval:
            Monitoring interval (seconds) between job status polling requests.
        :param url: Instance URL if not already provided during client creation.
        :returns: results of the operation.
        """
        if isinstance(inputs, list) and all(
                isinstance(item, list) for item in inputs):
            inputs = [items for sub in inputs
                      for items in sub]  # flatten 2D->1D list
        values = self._parse_inputs(inputs)
        if isinstance(values, OperationResult):
            return values
        data = {
            # NOTE: since sync is not yet properly implemented in Weaver, simulate with monitoring after if requested
            # FIXME: support 'sync' (https://github.com/crim-ca/weaver/issues/247)
            "mode": EXECUTE_MODE_ASYNC,
            "inputs": values,
            # FIXME: support 'response: raw' (https://github.com/crim-ca/weaver/issues/376)
            "response": EXECUTE_RESPONSE_DOCUMENT,
            # FIXME: allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375)
            # FIXME: allow 'transmissionMode: value/reference' selection (https://github.com/crim-ca/weaver/issues/377)
            "outputs": {}
        }
        # FIXME: since (https://github.com/crim-ca/weaver/issues/375) not implemented, auto-populate all the outputs
        base = self._get_url(url)
        result = self.describe(process_id, url=base)
        if not result.success:
            return OperationResult(
                False,
                "Could not obtain process description for execution.",
                body=result.body,
                headers=result.headers,
                code=result.code,
                text=result.text)
        outputs = result.body.get("outputs")
        for output_id in outputs:
            # use 'value' to have all outputs reported in body as 'value/href' rather than 'Link' headers
            data["outputs"][output_id] = {
                "transmissionMode": EXECUTE_TRANSMISSION_MODE_VALUE
            }

        LOGGER.info("Executing [%s] with inputs:\n%s", process_id,
                    _json2text(inputs))
        path = f"{base}/processes/{process_id}/execution"  # use OGC-API compliant endpoint (not '/jobs')
        resp = request_extra("POST",
                             path,
                             json=data,
                             headers=self._headers,
                             settings=self._settings)
        result = self._parse_result(resp)
        if not monitor or not result.success:
            return result
        # although Weaver returns "jobID" in the body for convenience,
        # employ the "Location" header to be OGC-API compliant
        job_url = resp.headers.get("Location", "")
        time.sleep(
            1
        )  # small delay to ensure process execution had a chance to start before monitoring
        return self.monitor(job_url, timeout=timeout, interval=interval)
Beispiel #29
0
    def deploy(
            self,
            process_id=None,  # type: Optional[str]
            body=None,  # type: Optional[Union[JSON, str]]
            cwl=None,  # type: Optional[Union[CWL, str]]
            wps=None,  # type: Optional[str]
            token=None,  # type: Optional[str]
            username=None,  # type: Optional[str]
            password=None,  # type: Optional[str]
            undeploy=False,  # type: bool
            url=None,  # type: Optional[str]
    ):  # type: (...) -> OperationResult
        """
        Deploy a new :term:`Process` with specified metadata and reference to an :term:`Application Package`.

        The referenced :term:`Application Package` must be one of:
        - :term:`CWL` body, local file or URL in :term:`JSON` or :term:`YAML` format
        - :term:`WPS` process URL with :term:`XML` response
        - :term:`WPS-REST` process URL with :term:`JSON` response
        - :term:`OGC API - Processes` process URL with :term:`JSON` response

        If the reference is resolved to be a :term:`Workflow`, all its underlying :term:`Process` steps must be
        available under the same URL that this client was initialized with.

        .. seealso::
            :ref:`proc_op_deploy`

        :param process_id:
            Desired process identifier.
            Can be omitted if already provided in body contents or file.
        :param body:
            Literal :term:`JSON` contents, either using string representation of actual Python objects forming the
            request body, or file path/URL to :term:`YAML` or :term:`JSON` contents of the request body.
            Other parameters (:paramref:`process_id`, :paramref:`cwl`) can override corresponding fields within the
            provided body.
        :param cwl:
            Literal :term:`JSON` or :term:`YAML` contents, either using string representation of actual Python objects,
            or file path/URL with contents of the :term:`CWL` definition of the :term:`Application package` to be
            inserted into the body.
        :param wps:
            URL to an existing :term:`WPS` process (WPS-1/2 or WPS-REST/OGC-API).
        :param token:
            Authentication token for accessing private Docker registry if :term:`CWL` refers to such image.
        :param username:
            Username to form the authentication token to a private Docker registry.
        :param password:
            Password to form the authentication token to a private Docker registry.
        :param undeploy:
            Perform undeploy step as applicable prior to deployment to avoid conflict with exiting :term:`Process`.
        :param url:
            Instance URL if not already provided during client creation.
        :returns: results of the operation.
        """
        result = self._parse_deploy_body(body, process_id)
        if not result.success:
            return result
        headers = copy.deepcopy(self._headers)
        headers.update(self._parse_auth_token(token, username, password))
        data = result.body
        result = self._parse_deploy_package(data, cwl, wps, process_id,
                                            headers)
        if not result.success:
            return result
        p_id = result.message
        data = result.body
        base = self._get_url(url)
        if undeploy:
            LOGGER.debug("Performing requested undeploy of process: [%s]",
                         p_id)
            result = self.undeploy(process_id=p_id, url=base)
            if result.code not in [200, 404]:
                return OperationResult(
                    False,
                    "Failed requested undeployment prior deployment.",
                    body=result.body,
                    text=result.text,
                    code=result.code,
                    headers=result.headers)
        path = f"{base}/processes"
        resp = request_extra("POST",
                             path,
                             json=data,
                             headers=headers,
                             settings=self._settings)
        return self._parse_result(resp)
Beispiel #30
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        self.update_status("Preparing execute request for remote WPS1 provider.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for %s", self.process)
        try:
            try:
                wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify)
                raise_on_xml_exception(wps._capabilities)  # noqa: W0212
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex)))
            try:
                process = wps.describeprocess(self.process)
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex)))

            # prepare inputs
            complex_inputs = []
            for process_input in process.dataInputs:
                if WPS_COMPLEX_DATA in process_input.dataType:
                    complex_inputs.append(process_input.identifier)

            # remove any 'null' input, should employ the 'default' of the remote WPS process
            inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs)

            wps_inputs = []
            for input_key in inputs_provided_keys:
                input_val = workflow_inputs[input_key]
                # in case of array inputs, must repeat (id,value)
                # in case of complex input (File), obtain location, otherwise get data value
                if not isinstance(input_val, list):
                    input_val = [input_val]

                input_values = []
                for val in input_val:
                    if isinstance(val, dict):
                        val = val["location"]

                    # owslib only accepts strings, not numbers directly
                    if isinstance(val, (int, float)):
                        val = str(val)

                    if val.startswith("file://"):
                        # we need to host file starting with file:// scheme
                        val = self.host_file(val)

                    input_values.append(val)

                # need to use ComplexDataInput structure for complex input
                # TODO: BoundingBox not supported
                for input_value in input_values:
                    if input_key in complex_inputs:
                        input_value = ComplexDataInput(input_value)

                    wps_inputs.append((input_key, input_value))

            # prepare outputs
            outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs
                       if o.identifier in expected_outputs]

            self.update_status("Executing job on remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING)

            mode = EXECUTE_MODE_ASYNC
            execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True)
            if not execution.process and execution.errors:
                raise execution.errors[0]

            self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider),
                               REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

            max_retries = 5
            num_retries = 0
            run_step = 0
            job_id = "<undefined>"
            while execution.isNotComplete() or run_step == 0:
                if num_retries >= max_retries:
                    raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries))
                try:
                    execution = check_wps_status(location=execution.statusLocation, verify=self.verify,
                                                 sleep_secs=wait_secs(run_step))
                    job_id = execution.statusLocation.replace(".xml", "").split("/")[-1]
                    LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                     execution.percentCompleted, execution.statusMessage,
                                                     execution.statusLocation))
                    self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()),
                                                       message=execution.statusMessage,
                                                       progress=execution.percentCompleted,
                                                       duration=None),  # get if available
                                       map_progress(execution.percentCompleted,
                                                    REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT),
                                       status.STATUS_RUNNING)
                except Exception as exc:
                    num_retries += 1
                    LOGGER.debug("Exception raised: %r", exc)
                    sleep(1)
                else:
                    num_retries = 0
                    run_step += 1

            if not execution.isSucceded():
                exec_msg = execution.statusMessage or "Job failed."
                LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                 execution.percentCompleted, exec_msg, execution.statusLocation))
                raise Exception(execution.statusMessage or "Job failed.")

            self.update_status("Fetching job outputs from remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING)

            results = [ows2json_output(output, process) for output in execution.processOutputs]
            for result in results:
                result_id = get_any_id(result)
                result_val = get_any_value(result)
                if result_id in expected_outputs:
                    # This is where cwl expect the output file to be written
                    # TODO We will probably need to handle multiple output value...
                    dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]])

                    # TODO Should we handle other type than File reference?

                    resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings)
                    LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn)
                    with open(dst_fn, mode="wb") as dst_fh:
                        dst_fh.write(resp.content)

        except Exception as exc:
            exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__)
            errors = "{0}: {1!s}".format(exception_class, exc)
            LOGGER.exception(exc)
            raise Exception(errors)

        self.update_status("Execution on remote WPS1 provider completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)