Beispiel #1
0
def to_owslib(value, data_type, encoding=None, mimetype=None, schema=None):
    """Convert value into OWSlib objects."""
    # owslib only accepts literaldata, complexdata and boundingboxdata

    if data_type == "ComplexData":
        return ComplexDataInput(value, encoding=encoding, mimeType=mimetype, schema=schema)
    if data_type == "BoundingBoxData":
        # TODO: return BoundingBoxDataInput(data=value, crs=crs, dimensions=2)
        return value
    else:  # LiteralData
        return str(value)
    def execute_process(self):
        self.setCursor(Qt.WaitCursor)
        # Async call: https://ouranosinc.github.io/pavics-sdi/tutorials/wps_with_python.html
        myinputs = []
        for param, widget in self.input_items.items():
            if isinstance(widget, QgsMapLayerComboBox):
                # TODO check input type and export into it (GML, GeoPackage, etc.)
                layer = widget.currentLayer()
                if layer is None:
                    iface.messageBar().pushMessage(
                        self.tr("Error"),
                        self.tr("There is not any layer"),
                        level=Qgis.Critical)
                    return
                if layer.type() == QgsMapLayer.VectorLayer:
                    tmp_ext = '.gml'
                    tmp_frmt = 'GML'
                else:
                    iface.messageBar().pushMessage(
                        "Error",
                        "Unsupported map layer type",
                        level=Qgis.Critical)
                    return

                tmp_file = QgsProcessingUtils.generateTempFilename(
                    self.process_identifier + '_' + param) + tmp_ext
                QgsVectorFileWriter.writeAsVectorFormat(layer,
                                                        tmp_file,
                                                        fileEncoding="UTF-8",
                                                        driverName=tmp_frmt)
                with open(tmp_file) as fd:
                    cdi = ComplexDataInput(fd.read())
                myinputs.append((param, cdi))
            elif isinstance(widget, QgsFieldComboBox):
                myinputs.append((param, widget.currentField()))
            elif isinstance(widget, QgsDateTimeEdit):
                myinputs.append((param, widget.date().toString('yyyy-MM-dd')))
            else:
                # TODO check also other types than just QLineEdit
                if widget.text() != 'None':
                    myinputs.append((param, widget.text()))
        self.appendLogMessage(
            self.tr("Executing {} process ...".format(
                self.process_identifier)))
        self.executeProcess = ExecuteProcess()
        self.executeProcess.setUrl(self.service_url)
        self.executeProcess.setIdentifier(self.process_identifier)
        #         print(myinputs)
        self.executeProcess.setInputs(myinputs)
        self.executeProcess.statusChanged.connect(
            self.on_execute_process_response)
        self.executeProcess.start()
Beispiel #3
0
    def execute(self, appstruct):
        inputs = appstruct_to_inputs(self.request, appstruct)
        # need to use ComplexDataInput
        complex_inpts = {}
        bbox_inpts = []
        for inpt in self.process.dataInputs:
            if 'ComplexData' in inpt.dataType:
                complex_inpts[inpt.identifier] = inpt
            elif 'BoundingBoxData' in inpt.dataType:
                bbox_inpts.append(inpt.identifier)
        new_inputs = []
        for inpt in inputs:
            identifier = inpt[0]
            value = inpt[1]
            if identifier in complex_inpts:
                new_inputs.append((identifier, ComplexDataInput(value)))
                if is_reference(value):
                    if value not in self.request.cart:
                        if complex_inpts[identifier].supportedValues:
                            mime_type = complex_inpts[
                                identifier].supportedValues[0].mimeType
                        else:
                            mime_type = None
                        LOGGER.debug("add input to cart: %s %s", identifier,
                                     mime_type)
                        self.request.cart.add_item(
                            value,
                            abstract=
                            "Automatically added in process execution.",
                            mime_type=mime_type)
            elif identifier in bbox_inpts:
                new_inputs.append((identifier, BoundingBoxDataInput(value)))
            else:
                new_inputs.append(inpt)
        inputs = new_inputs
        # prepare outputs
        outputs = []
        for output in self.process.processOutputs:
            outputs.append(
                (output.identifier, output.dataType == 'ComplexData'))

        from phoenix.tasks.execute import execute_process
        result = execute_process.delay(
            userid=self.request.unauthenticated_userid,
            url=self.wps.url,
            service_name=self.service_name,
            identifier=self.process.identifier,
            inputs=inputs,
            outputs=outputs,
            async=appstruct.get('_async_check', True))
        self.request.registry.notify(JobStarted(self.request, result.id))
        return result.id
Beispiel #4
0
def build_input(arg, is_data, process_inputs, sh):
    k, v = arg.split("=", 1)
    if not v:
        # skip those not specified, hopefully there will be some sane default
        return None
    inp = process_inputs.get(k, None)
    if not inp:
        # weird, not one of inputs, ignore
        return None
    clean_v = util.restore_text(v)
    if is_data:
        file_url = None
        # really data? check if HTML of previous dataminer
        if magic.from_file(clean_v, mime=True) == "text/html":
            # html, try to read it and get the output description
            parser = CallerHTMLParser()
            parser.feed(open(clean_v).read())
            outputs = parser.caller_dataminer_data()
            # try to guess which one is the right input
            if outputs:
                for out in outputs.get("outputs", []):
                    # discard 'Log of the computation.csv'
                    if out["descriptor"] == "Log of the computation":
                        continue
                    logging.info("Assuming %s as the right input for the process", out)
                    file_url = out["url"]
                    break
        if not file_url:
            # we are here, so the html was not working as expected so just copy
            # to the StorageHub
            file_url = sh.upload_file(k, clean_v)
        return (k, ComplexDataInput(file_url, mimeType="text/xml"))
    else:
        if inp.dataType == "ComplexData":
            # assume text/xml is fine always?
            return (k, ComplexDataInput(clean_v, mimeType="text/xml"))
        else:
            # let's assume just taking the value is ok
            return (k, clean_v)
Beispiel #5
0
def test_wps_request8():
    # Process input/ouutput arguments
    processid = "wordcount"
    textdoc = ComplexDataInput("Alice was beginning to get very tired ...")
    inputs = [("text", textdoc), ]
    outputs = [("output", True), ]

    # Build XML request for WPS process execution
    execution = WPSExecution()
    requestElement = execution.buildRequest(processid, inputs, output=outputs, async=True, lineage=True)
    request = etree.tostring(requestElement)

    # Compare to cached XML request
    _request = open(resource_file('wps_EmuExecuteRequest8.xml'), 'rb').read()
    assert compare_xml(request, _request) is True
Beispiel #6
0
def execute_process(process_name):
    data = json.loads(request.data)
    process_data = []
    if Constants.DATA_PROPERTY in data.keys():
        data_tuples = [(k, v)
                       for k, v in data[Constants.DATA_PROPERTY].items()]
        process_data = process_data + data_tuples
    if Constants.GEO_DATA_PROPERTY in data.keys():
        geo_date_tuples = [
            (k, ComplexDataInput(v, Constants.GEOJSON_FORMAT))
            for k, v in data[Constants.GEO_DATA_PROPERTY].items()
        ]
        process_data = process_data + geo_date_tuples
    execution = wps.execute(process_name, process_data)
    outputs = execution.processOutputs
    return jsonify(parse_outputs(outputs))
Beispiel #7
0
def test_wps_request7():
    # Process input/ouutput arguments
    processid = "wordcount"
    textdoc = ComplexDataInput("http://emu.readthedocs.org/en/latest/index.html")
    inputs = [("text", textdoc), ]
    outputs = [("output", True)]

    # Build XML request for WPS process execution, sync request
    execution = WPSExecution()
    requestElement = execution.buildRequest(processid, inputs, output=outputs, mode=SYNC, lineage=False)
    request = etree.tostring(requestElement)

    # Compare to cached XML request
    _request = open(resource_file('wps_EmuExecuteRequest7.xml'), 'rb').read()
    print(request)
    assert compare_xml(request, _request) is True
Beispiel #8
0
    def execute(self, appstruct):
        inputs = appstruct_to_inputs(self.request, appstruct)
        # need to use ComplexDataInput
        complex_inpts = {}
        bbox_inpts = []
        for inpt in self.process.dataInputs:
            if 'ComplexData' in inpt.dataType:
                complex_inpts[inpt.identifier] = inpt
            elif 'BoundingBoxData' in inpt.dataType:
                bbox_inpts.append(inpt.identifier)
        new_inputs = []
        for inpt in inputs:
            identifier = inpt[0]
            value = inpt[1]
            if identifier in complex_inpts:
                new_inputs.append((identifier, ComplexDataInput(value)))
            elif identifier in bbox_inpts:
                crs = 'urn:ogc:def:crs:OGC:2:84'
                new_inputs.append((identifier, BoundingBoxDataInput(value, crs=crs)))
            else:
                new_inputs.append(inpt)
        inputs = new_inputs
        # prepare outputs
        outputs = []
        for output in self.process.processOutputs:
            outputs.append(
                (output.identifier, output.dataType == 'ComplexData'))

        from phoenix.tasks.execute import execute_process
        result = execute_process.delay(
            userid=self.request.unauthenticated_userid,
            url=self.wps.url,
            service_name=self.service.title,
            identifier=self.process.identifier,
            inputs=inputs,
            outputs=outputs,
            use_async=appstruct.get('_async_check', True))

        # give the job a chance to start
        sleep(1)
        self.request.registry.notify(JobStarted(self.request, result.id))
        LOGGER.debug('wps url={}'.format(self.wps.url))
        LOGGER.debug('request inputs = {}'.format(str(inputs)))
        return result.id
Beispiel #9
0
 def _build_wps_inputs(self):
     process = self.wps.describeprocess(self.identifier)
     complex_inpts = []
     bbox_inpts = []
     for inpt in process.dataInputs:
         if 'ComplexData' in inpt.dataType:
             complex_inpts.append(inpt.identifier)
         elif 'BoundingBoxData' in inpt.dataType:
             bbox_inpts.append(inpt.identifier)
     inputs = []
     for inpt in self.wps_inputs:
         LOGGER.debug("input=%s", inpt)
         if inpt[0] in complex_inpts:
             inputs.append((inpt[0], ComplexDataInput(inpt[1])))
         elif inpt[0] in bbox_inpts:
             inputs.append((inpt[0], BoundingBoxDataInput(inpt[1])))
         else:
             inputs.append(inpt)
     return inputs
Beispiel #10
0
def parse_wps_inputs(wps_process, job):
    """
    Parses expected WPS process inputs against submitted job input values considering supported process definitions.
    """
    complex_inputs = []
    for process_input in wps_process.dataInputs:
        if WPS_COMPLEX_DATA in process_input.dataType:
            complex_inputs.append(process_input.identifier)

    try:
        wps_inputs = list()
        # parse both dict and list type inputs
        job_inputs = job.inputs.items() if isinstance(job.inputs, dict) else job.get("inputs", [])
        for process_input in job_inputs:
            if isinstance(process_input, tuple):
                input_id = process_input[0]
                process_value = process_input[1]
            else:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
            # in case of array inputs, must repeat (id,value)
            input_values = process_value if isinstance(process_value, list) else [process_value]

            # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
            input_values = [
                # when value is an array of dict that each contain a file reference
                (get_any_value(val)[7:] if str(get_any_value(val)).startswith("file://") else get_any_value(val))
                if isinstance(val, dict) else
                # when value is directly a single dict with file reference
                (val[7:] if str(val).startswith("file://") else val)
                for val in input_values
            ]

            # need to use ComplexDataInput structure for complex input
            # need to use literal String for anything else than complex
            # TODO: BoundingBox not supported
            wps_inputs.extend([
                (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value))
                for input_value in input_values])
    except KeyError:
        wps_inputs = []
    return wps_inputs
Beispiel #11
0
def test_smoke_execute_c3s_cmip6_orchestrate_metadata(wps, tmp_path):
    inputs = [
        ("workflow", ComplexDataInput(WF_C3S_CMIP6)),
    ]
    urls = wps.execute("orchestrate", inputs)
    assert len(urls) == 1
    assert "rlds_Amon_INM-CM5-0_ssp245_r1i1p1f1_gr1_avg-t.nc" in urls[0]
    ds = open_dataset(urls[0], tmp_path)
    assert "rlds" in ds.variables
    # check fill value in bounds
    assert "_FillValue" not in ds.lat_bnds.encoding
    assert "_FillValue" not in ds.lon_bnds.encoding
    # assert "_FillValue" not in ds.time_bnds.encoding
    # check fill value in coordinates
    # assert "_FillValue" not in ds.time.encoding
    assert "_FillValue" not in ds.lat.encoding
    assert "_FillValue" not in ds.lon.encoding
    # assert "_FillValue" not in ds.height.encoding
    # check coordinates in bounds
    assert "coordinates" not in ds.lat_bnds.encoding
    assert "coordinates" not in ds.lon_bnds.encoding
 def execute_process(self):
     myinputs = []
     for x in self.input_items:
         print(x)
         print(self.input_items[x])
         if isinstance(self.input_items[x], QgsMapLayerComboBox):
             # TODO check input type and export into it (GML, GeoPackage, etc.)
             cdi = ComplexDataInput(
                 'http://rain.fsv.cvut.cz/geodata/test.gml')
             myinputs.append((x, cdi))
         else:
             # TODO check also other types than just QLineEdit
             if self.input_items[x].text() != 'None':
                 myinputs.append((x, self.input_items[x].text()))
     self.textEditLog.append(
         QApplication.translate("WPS", "Executing process ...", None))
     self.executeProcess = ExecuteProcess()
     self.executeProcess.setUrl(self.lineEditWpsUrl.text())
     self.executeProcess.setIdentifier(self.comboBoxProcesses.currentText())
     self.executeProcess.setInputs(myinputs)
     self.executeProcess.statusChanged.connect(
         self.on_execute_process_response)
     self.executeProcess.start()
Beispiel #13
0
def complex_input_with_content():
    """
    use ComplexDataInput with a direct content
    """
    
    print("\ncomplex_input_with_content ...")
     
    wps = WebProcessingService('http://localhost:8094/wps', verbose=verbose)

    processid = 'wordcount'
    textdoc = ComplexDataInput("ALICE was beginning to get very tired ...")   # alice in wonderland
    inputs = [("text", textdoc)]
    # list of tuple (output identifier, asReference attribute)
    outputs = [("output",True)]

    execution = wps.execute(processid, inputs, output=outputs)
    monitorExecution(execution)

    # show status
    print('percent complete', execution.percentCompleted)
    print('status message', execution.statusMessage)

    for output in execution.processOutputs:
        print('identifier=%s, dataType=%s, data=%s, reference=%s' % (output.identifier, output.dataType, output.data, output.reference)) 
Beispiel #14
0
def complex_input_with_reference():
    """
    use ComplexDataInput with a reference to a document
    """
    
    print("\ncomplex_input_with_reference ...")

    wps = WebProcessingService('http://localhost:8094/wps', verbose=verbose)

    processid = 'wordcount'
    textdoc = ComplexDataInput("http://www.gutenberg.org/files/28885/28885-h/28885-h.htm")   # alice in wonderland
    inputs = [("text", textdoc)]
    # list of tuple (output identifier, asReference attribute)
    outputs = [("output",True)]

    execution = wps.execute(processid, inputs, output=outputs)
    monitorExecution(execution)

    # show status
    print('percent complete', execution.percentCompleted)
    print('status message', execution.statusMessage)

    for output in execution.processOutputs:
        print('identifier=%s, dataType=%s, data=%s, reference=%s' % (output.identifier, output.dataType, output.data, output.reference)) 
Beispiel #15
0
processid = 'org.n52.wps.python.algorithm.QuakeMLProcess'
inputs = [("lonmin", "288"), ("lonmax", "292"), ("latmin", "-70"),
          ("latmax", "-10"), ("mmin", "6.6"), ("mmax", "8.5"), ("zmin", "5"),
          ("zmax", "140"), ("p", "0.1"), ("etype", "deaggregation"),
          ("tlon", "-71.5730623712764"), ("tlat", "-33.1299174879672")]
output = "selected-rows"
execution = wps.execute(processid, inputs, output)

from owslib.wps import monitorExecution
monitorExecution(execution)

print(execution.processOutputs[0].reference)

from owslib.wps import ComplexDataInput
quakeMLInput = ComplexDataInput(value=execution.processOutputs[0].reference)

quakeMLInput.schema = "http://quakeml.org/xmlns/quakeml/1.2/QuakeML-1.2.xsd"
quakeMLInput.mimeType = "text/xml"

processid2 = 'org.n52.wps.python.algorithm.ShakemapProcess'
inputs2 = [("quakeml-input", quakeMLInput)]
output2 = "shakemap-output"
execution2 = wps.execute(processid2, inputs2, output2)

monitorExecution(execution2)

print(execution2.processOutputs[0].reference)

import urllib.request
contents = urllib.request.urlopen(
Beispiel #16
0
    def _adapt(self, input_value, input_desc, expecting_reference):
        """
        Try to fit the input_value to the needs of the downstream task input requirements
        This can involve returning the data or its reference, parsing the reference to return its content or even
        loading a json structure (or a reference ot it) and returning its content
        The function will raise an exception if the input_value cannot met the input requirements
        :param input_value: output data as returned by the upstream task.
                            This object come from the WPSExecution.processOutputs array of (ows.wps.Output) object
        :param input_desc: input description as declared by the current task
        :param expecting_reference: indicate if the current task expects the input_value as reference or not
        :return: The data in the required form
                 The data will be feed to the WebProcessingService.execute function which expect an array of inputs
                 where the input can be :
                    - LiteralData inputs are expressed as simple (key,value) tuples where key is the input identifier,
                                  value is the value
                    - ComplexData inputs are expressed as (key, object) tuples, where key is the input identifier,
                                  and the object must contain a 'getXml()' method that returns an XML infoset to be
                                  included in the WPS request
                                  (ows.wps.ComplexDataInput or ows.wps.BoundingBoxDataInput)
        """
        expecting_complex = input_desc.dataType == 'ComplexData'

        output_data = None
        output_datatype = [
            input_value.dataType,
        ]

        if input_value.reference:
            # If a reference is available and we expect a reference consider it as the data from this point
            if expecting_reference:
                output_data = input_value.reference

                # Append the string datatype since a reference can be considered as a string too
                output_datatype.append('string')

            # If we expect the data read the reference
            else:
                output_data = self._read_reference(input_value.reference)

        # process output data are append into a list and
        # WPS standard v1.0.0 specify that Output data field has zero or one value
        elif input_value.data:
            output_data = input_value.data[0]

        # At this point raise an exception if we don't have data in wps_output.data
        if not output_data:
            raise TaskPE._get_exception(self.name, input_value, input_desc)

        # Consider the validation completed if the dataType match for non-complex data or
        # if the mimetype match for complex data
        supported_mimetypes = [
            value.mimeType for value in input_desc.supportedValues
        ] if expecting_complex else []
        if input_desc.dataType in output_datatype and \
           (not expecting_complex or input_value.mimeType in supported_mimetypes):
            return [
                output_data,
            ]

        # Remain cases are either datatypes or complex data mimetypes mismatching...

        # Before raising an exception we will check for a specific case that we can handle:
        # json array that could be fed into the downstream wps wanting an array of data.
        # If this specific case is detected we will simply send the json content to the downstream wps without further
        # validation since the json content type cannot be verified.
        take_array = input_desc.maxOccurs > 1
        if take_array and 'ComplexData' in output_datatype and input_value.mimeType == 'application/json':
            # If the json data is referenced and hasn't already been read, read it now
            if input_value.reference and expecting_reference:
                output_data = self._read_reference(input_value.reference)

            json_data = json.loads(output_data)
            if isinstance(json_data, list):
                array = []
                for value in json_data:
                    # Be a good guy and set the mimeType to something expected...
                    array.append(
                        ComplexDataInput(value,
                                         mimeType=input_desc.supportedValues[0]
                                         ) if expecting_complex else value)
                return array

        # Cannot do anything else
        raise TaskPE._get_exception(self.name, input_value, input_desc)
Beispiel #17
0
    def execute(self, workflow_inputs, out_dir, expected_outputs):
        self.update_status("Preparing execute request for remote WPS1 provider.",
                           REMOTE_JOB_PROGRESS_REQ_PREP, status.STATUS_RUNNING)
        LOGGER.debug("Execute process WPS request for %s", self.process)
        try:
            try:
                wps = WebProcessingService(url=self.provider, headers=self.cookies, verify=self.verify)
                raise_on_xml_exception(wps._capabilities)  # noqa: W0212
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex)))
            try:
                process = wps.describeprocess(self.process)
            except Exception as ex:
                raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex)))

            # prepare inputs
            complex_inputs = []
            for process_input in process.dataInputs:
                if WPS_COMPLEX_DATA in process_input.dataType:
                    complex_inputs.append(process_input.identifier)

            # remove any 'null' input, should employ the 'default' of the remote WPS process
            inputs_provided_keys = filter(lambda i: workflow_inputs[i] != "null", workflow_inputs)

            wps_inputs = []
            for input_key in inputs_provided_keys:
                input_val = workflow_inputs[input_key]
                # in case of array inputs, must repeat (id,value)
                # in case of complex input (File), obtain location, otherwise get data value
                if not isinstance(input_val, list):
                    input_val = [input_val]

                input_values = []
                for val in input_val:
                    if isinstance(val, dict):
                        val = val["location"]

                    # owslib only accepts strings, not numbers directly
                    if isinstance(val, (int, float)):
                        val = str(val)

                    if val.startswith("file://"):
                        # we need to host file starting with file:// scheme
                        val = self.host_file(val)

                    input_values.append(val)

                # need to use ComplexDataInput structure for complex input
                # TODO: BoundingBox not supported
                for input_value in input_values:
                    if input_key in complex_inputs:
                        input_value = ComplexDataInput(input_value)

                    wps_inputs.append((input_key, input_value))

            # prepare outputs
            outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs
                       if o.identifier in expected_outputs]

            self.update_status("Executing job on remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_EXECUTION, status.STATUS_RUNNING)

            mode = EXECUTE_MODE_ASYNC
            execution = wps.execute(self.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True)
            if not execution.process and execution.errors:
                raise execution.errors[0]

            self.update_status("Monitoring job on remote WPS1 provider : [{0}]".format(self.provider),
                               REMOTE_JOB_PROGRESS_MONITORING, status.STATUS_RUNNING)

            max_retries = 5
            num_retries = 0
            run_step = 0
            job_id = "<undefined>"
            while execution.isNotComplete() or run_step == 0:
                if num_retries >= max_retries:
                    raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries))
                try:
                    execution = check_wps_status(location=execution.statusLocation, verify=self.verify,
                                                 sleep_secs=wait_secs(run_step))
                    job_id = execution.statusLocation.replace(".xml", "").split("/")[-1]
                    LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                     execution.percentCompleted, execution.statusMessage,
                                                     execution.statusLocation))
                    self.update_status(get_job_log_msg(status=status.map_status(execution.getStatus()),
                                                       message=execution.statusMessage,
                                                       progress=execution.percentCompleted,
                                                       duration=None),  # get if available
                                       map_progress(execution.percentCompleted,
                                                    REMOTE_JOB_PROGRESS_MONITORING, REMOTE_JOB_PROGRESS_FETCH_OUT),
                                       status.STATUS_RUNNING)
                except Exception as exc:
                    num_retries += 1
                    LOGGER.debug("Exception raised: %r", exc)
                    sleep(1)
                else:
                    num_retries = 0
                    run_step += 1

            if not execution.isSucceded():
                exec_msg = execution.statusMessage or "Job failed."
                LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()),
                                                 execution.percentCompleted, exec_msg, execution.statusLocation))
                raise Exception(execution.statusMessage or "Job failed.")

            self.update_status("Fetching job outputs from remote WPS1 provider.",
                               REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING)

            results = [ows2json_output(output, process) for output in execution.processOutputs]
            for result in results:
                result_id = get_any_id(result)
                result_val = get_any_value(result)
                if result_id in expected_outputs:
                    # This is where cwl expect the output file to be written
                    # TODO We will probably need to handle multiple output value...
                    dst_fn = "/".join([out_dir.rstrip("/"), expected_outputs[result_id]])

                    # TODO Should we handle other type than File reference?

                    resp = request_extra("get", result_val, allow_redirects=True, settings=self.settings)
                    LOGGER.debug("Fetching result output from [%s] to cwl output destination: [%s]", result_val, dst_fn)
                    with open(dst_fn, mode="wb") as dst_fh:
                        dst_fh.write(resp.content)

        except Exception as exc:
            exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__)
            errors = "{0}: {1!s}".format(exception_class, exc)
            LOGGER.exception(exc)
            raise Exception(errors)

        self.update_status("Execution on remote WPS1 provider completed.",
                           REMOTE_JOB_PROGRESS_COMPLETED, status.STATUS_SUCCEEDED)
Beispiel #18
0
    def wps_execute(self, version, accept):
        wps_url = get_wps_url(self.settings)
        if version == "1.0.0":
            test_content = "Test file in Docker - WPS KVP"
            wps_method = "GET"
        elif version == "2.0.0":
            test_content = "Test file in Docker - WPS XML"
            wps_method = "POST"
        else:
            raise ValueError("Invalid WPS version: {}".format(version))
        test_content += " {} request - Accept {}".format(wps_method, accept.split("/")[-1].upper())

        with contextlib.ExitStack() as stack_exec:
            # setup
            dir_name = tempfile.gettempdir()
            tmp_file = stack_exec.enter_context(tempfile.NamedTemporaryFile(dir=dir_name, mode="w", suffix=".txt"))
            tmp_file.write(test_content)
            tmp_file.seek(0)
            for mock_exec in mocked_execute_process():
                stack_exec.enter_context(mock_exec)

            # execute
            if version == "1.0.0":
                wps_inputs = ["file={}@mimeType={}".format(tmp_file.name, CONTENT_TYPE_TEXT_PLAIN)]
                wps_params = {
                    "service": "WPS",
                    "request": "Execute",
                    "version": version,
                    "identifier": self.process_id,
                    "DataInputs": wps_inputs,
                }
                wps_headers = {"Accept": accept}
                wps_data = None
            else:
                wps_inputs = [("file", ComplexDataInput(tmp_file.name, mimeType=CONTENT_TYPE_TEXT_PLAIN))]
                wps_outputs = [(self.out_key, True)]  # as reference
                wps_exec = WPSExecution(version=version, url=wps_url)
                wps_req = wps_exec.buildRequest(self.process_id, wps_inputs, wps_outputs)
                wps_data = xml_util.tostring(wps_req)
                wps_headers = {"Accept": accept, "Content-Type": CONTENT_TYPE_APP_XML}
                wps_params = None
            resp = mocked_sub_requests(self.app, wps_method, wps_url,
                                       params=wps_params, data=wps_data, headers=wps_headers, only_local=True)
            assert resp.status_code in [200, 201], (
                "Failed with: [{}]\nTest: [{}]\nReason:\n{}".format(resp.status_code, test_content, resp.text)
            )

            # parse response status
            if accept == CONTENT_TYPE_APP_XML:
                assert resp.content_type in CONTENT_TYPE_ANY_XML, test_content
                xml_body = xml_util.fromstring(str2bytes(resp.text))
                status_url = xml_body.get("statusLocation")
                job_id = status_url.split("/")[-1].split(".")[0]
            elif accept == CONTENT_TYPE_APP_JSON:
                assert resp.content_type == CONTENT_TYPE_APP_JSON, test_content
                status_url = resp.json["location"]
                job_id = resp.json["jobID"]
            assert status_url
            assert job_id

            if accept == CONTENT_TYPE_APP_XML:
                wps_out_url = self.settings["weaver.wps_output_url"]
                weaver_url = self.settings["weaver.url"]
                assert status_url == f"{wps_out_url}/{job_id}.xml", "Status URL should be XML file for WPS-1 request"
                # remap to employ JSON monitor method (could be done with XML parsing otherwise)
                status_url = f"{weaver_url}/jobs/{job_id}"

            # job monitoring
            results = self.monitor_job(status_url)
            outputs = self.get_outputs(status_url)

            # validate XML status is updated accordingly
            wps_xml_status = os.path.join(self.settings["weaver.wps_output_dir"], job_id + ".xml")
            assert os.path.isfile(wps_xml_status)
            with open(wps_xml_status, "r") as status_file:
                assert "ProcessSucceeded" in status_file.read()

        self.validate_outputs(job_id, results, outputs, test_content)
Beispiel #19
0
def parse_wps_inputs(wps_process, job):
    # type: (ProcessOWS, Job) -> List[Tuple[str, OWS_Input_Type]]
    """
    Parses expected WPS process inputs against submitted job input values considering supported process definitions.
    """
    complex_inputs = {}  # type: Dict[str, ComplexInput]
    for process_input in wps_process.dataInputs:
        if WPS_COMPLEX_DATA in process_input.dataType:
            complex_inputs[process_input.identifier] = process_input

    try:
        wps_inputs = []
        # parse both dict and list type inputs
        job_inputs = job.inputs.items() if isinstance(
            job.inputs, dict) else job.get("inputs", [])
        for job_input in job_inputs:
            if isinstance(job_input, tuple):
                input_id = job_input[0]
                input_val = job_input[1]
                job_input = input_val
            else:
                input_id = get_any_id(job_input)
                input_val = get_any_value(job_input)
            # in case of array inputs, must repeat (id,value)
            if isinstance(input_val, list):
                input_values = input_val
                input_details = input_val  # each value has its own metadata
            else:
                input_values = [input_val]
                input_details = [
                    job_input
                ]  # metadata directly in definition, not nested per array value

            # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
            input_values = [
                # when value is an array of dict that each contain a file reference
                (get_any_value(val)[7:] if str(get_any_value(val)).startswith(
                    "file://") else get_any_value(val)) if isinstance(
                        val, dict) else
                # when value is directly a single dict with file reference
                (val[7:] if str(val).startswith("file://") else val)
                for val in input_values
            ]

            for input_value, input_detail in zip(input_values, input_details):
                # need to use ComplexDataInput structure for complex input
                if input_id in complex_inputs:
                    # if provided, pass down specified data input format to allow validation against supported formats
                    ctype = get_field(input_detail, "type", default=None)
                    encoding = None
                    if not ctype:
                        media_format = get_field(input_detail,
                                                 "format",
                                                 default=None)
                        if isinstance(media_format, dict):
                            ctype = get_field(input_detail,
                                              "mime_type",
                                              search_variations=True,
                                              default=None)
                            encoding = get_field(input_detail,
                                                 "encoding",
                                                 search_variations=True,
                                                 default=None)
                    wps_inputs.append((input_id,
                                       ComplexDataInput(input_value,
                                                        mimeType=ctype,
                                                        encoding=encoding)))
                # need to use literal String for anything else than complex
                # FIXME: pre-validate allowed literal values?
                # TODO: BoundingBox not supported
                else:
                    wps_inputs.append((input_id, str(input_value)))
    except KeyError:
        wps_inputs = []
    return wps_inputs
Beispiel #20
0
    def wps_execute(self, version, accept):
        wps_url = get_wps_url(self.settings)
        if version == "1.0.0":
            test_content = "Test file in Docker - WPS KVP"
            wps_method = "GET"
        elif version == "2.0.0":
            test_content = "Test file in Docker - WPS XML"
            wps_method = "POST"
        else:
            raise ValueError("Invalid WPS version: {}".format(version))
        test_content += " {} request - Accept {}".format(wps_method, accept.split("/")[-1].upper())

        with contextlib.ExitStack() as stack_exec:
            # setup
            dir_name = tempfile.gettempdir()
            tmp_path = tempfile.NamedTemporaryFile(dir=dir_name, mode="w", suffix=".txt")
            tmp_file = stack_exec.enter_context(tmp_path)  # noqa
            tmp_file.write(test_content)
            tmp_file.seek(0)
            for mock_exec in mocked_execute_process():
                stack_exec.enter_context(mock_exec)

            # execute
            if version == "1.0.0":
                wps_inputs = ["file={}@mimeType={}".format(tmp_file.name, CONTENT_TYPE_TEXT_PLAIN)]
                wps_params = {
                    "service": "WPS",
                    "request": "Execute",
                    "version": version,
                    "identifier": self.process_id,
                    "DataInputs": wps_inputs,
                }
                wps_headers = {"Accept": accept}
                wps_data = None
            else:
                wps_inputs = [("file", ComplexDataInput(tmp_file.name, mimeType=CONTENT_TYPE_TEXT_PLAIN))]
                wps_outputs = [(self.out_key, True)]  # as reference
                wps_exec = WPSExecution(version=version, url=wps_url)
                wps_req = wps_exec.buildRequest(self.process_id, wps_inputs, wps_outputs)
                wps_data = lxml.etree.tostring(wps_req)
                wps_headers = {"Accept": accept, "Content-Type": CONTENT_TYPE_APP_XML}
                wps_params = None
            resp = mocked_sub_requests(self.app, wps_method, wps_url,
                                       params=wps_params, data=wps_data, headers=wps_headers, only_local=True)
            assert resp.status_code in [200, 201], \
                "Failed with: [{}]\nTest: [{}]\nReason:\n{}".format(resp.status_code, test_content, resp.text)

            # parse response status
            if accept == CONTENT_TYPE_APP_XML:
                assert resp.content_type in CONTENT_TYPE_ANY_XML, test_content
                xml = lxml.etree.fromstring(str2bytes(resp.text))
                status_url = xml.get("statusLocation")
                job_id = status_url.split("/")[-1]
            elif accept == CONTENT_TYPE_APP_JSON:
                assert resp.content_type == CONTENT_TYPE_APP_JSON, test_content
                status_url = resp.json["location"]
                job_id = resp.json["jobID"]
            assert status_url
            assert job_id

            # job monitoring
            result = self.monitor_job(status_url)

        self.validate_outputs(job_id, result, test_content)
Beispiel #21
0
def execute_workflow(self, userid, url, service_name, workflow, caption=None):
    registry = app.conf['PYRAMID_REGISTRY']
    db = mongodb(registry)
    job = add_job(db,
                  userid=userid,
                  task_id=self.request.id,
                  is_workflow=True,
                  service_name=service_name,
                  process_id=workflow['worker']['identifier'],
                  caption=caption)

    try:
        # generate and run dispel workflow
        # TODO: fix owslib wps for unicode/yaml parameters
        # logger.debug('workflow=%s', workflow)
        headers = wps_headers(userid)
        # TODO: handle access token in workflow
        # workflow['worker']['url'] = build_get_url(
        #    workflow['worker']['url'],
        #    {'access_token': headers.get('Access-Token', '')})
        logger.debug('workflow=%s', workflow)
        inputs = [(
            'workflow',
            ComplexDataInput(
                # TODO: pywps-4 expects base64 encoding when not set to ''
                dump_json(workflow),
                mimeType="text/yaml",
                encoding=""))]
        outputs = [('output', True), ('logfile', True)]

        wps = WebProcessingService(url=url,
                                   skip_caps=True,
                                   verify=False,
                                   headers=headers)
        # worker_wps = WebProcessingService(url=workflow['worker']['url'],
        #                                   skip_caps=False, verify=False)
        execution = wps.execute(identifier='workflow',
                                inputs=inputs,
                                output=outputs,
                                lineage=True)
        # job['service'] = worker_wps.identification.title
        # job['title'] = getattr(execution.process, "title")
        # job['abstract'] = getattr(execution.process, "abstract")
        job['status_location'] = execution.statusLocation
        job['response'] = etree.tostring(execution.response)

        logger.debug("job init done %s ...", self.request.id)
        run_step = 0
        num_retries = 0
        while execution.isNotComplete():
            if num_retries >= 5:
                raise Exception(
                    "Could not read status document after 5 retries. Giving up."
                )
            try:
                execution = check_status(url=execution.statusLocation,
                                         verify=False,
                                         sleep_secs=wait_secs(run_step))
                job['response'] = etree.tostring(execution.response)
                job['status'] = execution.getStatus()
                job['status_message'] = execution.statusMessage
                job['progress'] = execution.percentCompleted
                duration = datetime.now() - job.get('created', datetime.now())
                job['duration'] = str(duration).split('.')[0]
                if execution.isComplete():
                    job['finished'] = datetime.now()
                    if execution.isSucceded():
                        for output in execution.processOutputs:
                            if 'output' == output.identifier:
                                result = yaml.load(
                                    urllib.urlopen(output.reference))
                                job['worker_status_location'] = result[
                                    'worker']['status_location']
                        job['progress'] = 100
                        save_log(job)
                    else:
                        job['status_message'] = '\n'.join(
                            error.text for error in execution.errors)
                        for error in execution.errors:
                            save_log(job, error)
                else:
                    save_log(job)
            except Exception:
                num_retries += 1
                logger.exception(
                    "Could not read status xml document for job %s. Trying again ...",
                    self.request.id)
                sleep(1)
            else:
                logger.debug("update job %s ...", self.request.id)
                num_retries = 0
                run_step += 1
                db.jobs.update({'identifier': job['identifier']}, job)
    except Exception as exc:
        logger.exception("Failed to run Job")
        job['status'] = "ProcessFailed"
        job['status_message'] = "Failed to run Job. %s" % exc.message
    finally:
        save_log(job)
        db.jobs.update({'identifier': job['identifier']}, job)

    registry.notify(JobFinished(job))
    return job['status']
# 1. test GetCapabilities query
wps = WebProcessingService('https://rain1.fsv.cvut.cz/services/wps',
                           skip_caps=True)
wps.getcapabilities()
print("Test 1: GetCapabilities -> list of processes:")
for process in wps.processes:
    print(process.identifier)

processId = 'd-rain-csv'

# 2. test DescribeProcess query
#process = wps.describeprocess(processId)
#print ("Test 2: DescribeProcess -> list of parameters:")
# for input in process.dataInputs:
#     print (input.identifier)
# for output in process.processOutputs:
#     print (output.identifier)

# 3. test Execute query
print("Test 3: Execute")
inputs = [("input",
           ComplexDataInput('http://rain.fsv.cvut.cz/geodata/test.gml')),
          ("keycolumn", "HLGP_ID"), ("return_period", "N2,N5,N10"),
          ("rainlength", "120")]
execution = wps.execute(processId, inputs)
outputFile = '/tmp/output.csv'
execution.getOutput(outputFile)
with open(outputFile) as fd:
    print(fd.readlines())
o
Beispiel #23
0
def execute_process(self, job_id, url, headers=None):
    from weaver.wps.service import get_pywps_service

    LOGGER.debug("Job execute process called.")
    settings = get_settings(app)
    task_logger = get_task_logger(__name__)
    load_pywps_config(settings)

    task_logger.debug("Job task setup.")

    # reset the connection because we are in a forked celery process
    db = get_db(app, reset_connection=True)
    store = db.get_store(StoreJobs)

    job = store.fetch_by_id(job_id)
    job.task_id = self.request.id
    job.progress = JOB_PROGRESS_SETUP
    job.save_log(logger=task_logger, message="Job task setup completed.")
    job = store.update_job(job)

    try:
        try:
            job.progress = JOB_PROGRESS_DESCRIBE
            job.save_log(
                logger=task_logger,
                message="Execute WPS request for process [{!s}]".format(
                    job.process))
            ssl_verify = get_ssl_verify_option("get", url, settings=settings)
            wps = WebProcessingService(url=url,
                                       headers=get_cookie_headers(headers),
                                       verify=ssl_verify)
            set_wps_language(wps, accept_language=job.accept_language)
            raise_on_xml_exception(wps._capabilities)  # noqa
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS capabilities. Error: [{}].".format(
                    str(ex)))
        try:
            process = wps.describeprocess(job.process)
        except Exception as ex:
            raise OWSNoApplicableCode(
                "Failed to retrieve WPS process description. Error: [{}].".
                format(str(ex)))

        # prepare inputs
        job.progress = JOB_PROGRESS_GET_INPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job input definitions.")
        complex_inputs = []
        for process_input in process.dataInputs:
            if WPS_COMPLEX_DATA in process_input.dataType:
                complex_inputs.append(process_input.identifier)

        try:
            wps_inputs = list()
            for process_input in job.inputs:
                input_id = get_any_id(process_input)
                process_value = get_any_value(process_input)
                # in case of array inputs, must repeat (id,value)
                input_values = process_value if isinstance(
                    process_value, list) else [process_value]

                # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
                input_values = [
                    val[7:] if str(val).startswith("file://") else val
                    for val in input_values
                ]

                # need to use ComplexDataInput structure for complex input
                # need to use literal String for anything else than complex
                # TODO: BoundingBox not supported
                wps_inputs.extend([
                    (input_id, ComplexDataInput(input_value)
                     if input_id in complex_inputs else str(input_value))
                    for input_value in input_values
                ])
        except KeyError:
            wps_inputs = []

        # prepare outputs
        job.progress = JOB_PROGRESS_GET_OUTPUTS
        job.save_log(logger=task_logger,
                     message="Fetching job output definitions.")
        wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA)
                       for o in process.processOutputs]

        mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC
        job.progress = JOB_PROGRESS_EXECUTE_REQUEST
        job.save_log(logger=task_logger,
                     message="Starting job process execution.")
        job.save_log(
            logger=task_logger,
            message=
            "Following updates could take a while until the Application Package answers..."
        )

        wps_worker = get_pywps_service(environ=settings, is_worker=True)
        execution = wps_worker.execute_job(job.process,
                                           wps_inputs=wps_inputs,
                                           wps_outputs=wps_outputs,
                                           mode=mode,
                                           job_uuid=job.id)
        if not execution.process and execution.errors:
            raise execution.errors[0]

        # adjust status location
        wps_status_path = get_wps_local_status_location(
            execution.statusLocation, settings)
        job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION
        LOGGER.debug("WPS status location that will be queried: [%s]",
                     wps_status_path)
        if not wps_status_path.startswith("http") and not os.path.isfile(
                wps_status_path):
            LOGGER.warning(
                "WPS status location not resolved to local path: [%s]",
                wps_status_path)
        job.save_log(logger=task_logger,
                     level=logging.DEBUG,
                     message="Updated job status location: [{}].".format(
                         wps_status_path))

        job.status = map_status(STATUS_STARTED)
        job.status_message = execution.statusMessage or "{} initiation done.".format(
            str(job))
        job.status_location = wps_status_path
        job.request = execution.request
        job.response = execution.response
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START
        job.save_log(logger=task_logger,
                     message="Starting monitoring of job execution.")
        job = store.update_job(job)

        max_retries = 5
        num_retries = 0
        run_step = 0
        while execution.isNotComplete() or run_step == 0:
            if num_retries >= max_retries:
                raise Exception(
                    "Could not read status document after {} retries. Giving up."
                    .format(max_retries))
            try:
                # NOTE:
                #   Don't actually log anything here until process is completed (success or fail) so that underlying
                #   WPS execution logs can be inserted within the current job log and appear continuously.
                #   Only update internal job fields in case they get referenced elsewhere.
                job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP
                execution = check_wps_status(location=wps_status_path,
                                             settings=settings,
                                             sleep_secs=wait_secs(run_step))
                job_msg = (execution.statusMessage or "").strip()
                job.response = execution.response
                job.status = map_status(execution.getStatus())
                job.status_message = "Job execution monitoring (progress: {}%, status: {})."\
                                     .format(execution.percentCompleted, job_msg or "n/a")
                # job.save_log(logger=task_logger)
                # job = store.update_job(job)

                if execution.isComplete():
                    job.mark_finished()
                    job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
                    msg_progress = " (status: {})".format(
                        job_msg) if job_msg else ""
                    if execution.isSucceded():
                        job.status = map_status(STATUS_SUCCEEDED)
                        job.status_message = "Job succeeded{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(logger=task_logger)
                        job_results = [
                            ows2json_output(output, process, settings)
                            for output in execution.processOutputs
                        ]
                        job.results = make_results_relative(
                            job_results, settings)
                    else:
                        task_logger.debug("Job failed.")
                        job.status_message = "Job failed{}.".format(
                            msg_progress)
                        wps_package.retrieve_package_job_log(execution, job)
                        job.save_log(errors=execution.errors,
                                     logger=task_logger)
                    task_logger.debug(
                        "Mapping Job references with generated WPS locations.")
                    map_locations(job, settings)

            except Exception as exc:
                num_retries += 1
                task_logger.debug("Exception raised: %s", repr(exc))
                job.status_message = "Could not read status XML document for {!s}. Trying again...".format(
                    job)
                job.save_log(errors=execution.errors, logger=task_logger)
                sleep(1)
            else:
                # job.status_message = "Update {}...".format(str(job))
                # job.save_log(logger=task_logger)
                num_retries = 0
                run_step += 1
            finally:
                job = store.update_job(job)

    except Exception as exc:
        LOGGER.exception("Failed running [%s]", job)
        job.status = map_status(STATUS_FAILED)
        job.status_message = "Failed to run {!s}.".format(job)
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR
        exception_class = "{}.{}".format(
            type(exc).__module__,
            type(exc).__name__)
        errors = "{0}: {1!s}".format(exception_class, exc)
        job.save_log(errors=errors, logger=task_logger)
    finally:
        job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END
        job.status_message = "Job {}.".format(job.status)
        job.save_log(logger=task_logger)

        # Send email if requested
        if job.notification_email is not None:
            job.progress = JOB_PROGRESS_NOTIFY
            try:
                notify_job_complete(job, job.notification_email, settings)
                message = "Notification email sent successfully."
                job.save_log(logger=task_logger, message=message)
            except Exception as exc:
                exception_class = "{}.{}".format(
                    type(exc).__module__,
                    type(exc).__name__)
                exception = "{0}: {1!s}".format(exception_class, exc)
                message = "Couldn't send notification email ({})".format(
                    exception)
                job.save_log(errors=message,
                             logger=task_logger,
                             message=message)

        job.progress = JOB_PROGRESS_DONE
        job.save_log(logger=task_logger, message="Job task complete.")
        job = store.update_job(job)

    return job.status