Exemple #1
0
    def from_json(cls, value):
        """init this request from json back again

        :param value: the json (not string) representation
        """
        process = Process.from_json(value['process'])
        wps_request = WPSRequest()
        wps_request.json = json.loads(value['wps_request'])
        wps_response = ExecuteResponse(wps_request=wps_request,
                                       uuid=process.uuid,
                                       process=process)
        wps_response.store_status_file = True
        new_job = Job(process=Process.from_json(value['process']),
                      wps_request=wps_request,
                      wps_response=wps_response)
        return new_job
Exemple #2
0
    def test_insert_request(self):

        requests = self.session.query(RequestInstance)
        assert requests.count() == 0

        obj = {
            'operation': 'execute',
            'version': '1.0.0',
            'language': 'eng',
            'identifier': 'multiple-outputs',
            'store_execute': True,
            'status': True,
            'lineage': True,
            'inputs': {
                'count': [{
                    'identifier': 'count',
                    'type': 'literal',
                    'data': 3
                }]
            },
            'outputs': {},
            'raw': False
        }

        request = WPSRequest()
        request.json = obj

        from .processes.metalinkprocess import MultipleOutputs
        process = MultipleOutputs()
        process.uuid = uuid.uuid4()
        store_request(process.uuid, request, process)

        requests = self.session.query(RequestInstance)
        assert requests.count() == 1
        stored_request = pop_first_stored()

        job = Job.from_json({
            "process": json.loads(stored_request.process),
            "wps_request": json.loads(stored_request.request)
        })

        assert job.process.identifier == "multiple-outputs"
        assert str(job.process.uuid) == str(process.uuid)

        requests = self.session.query(RequestInstance)
        assert requests.count() == 0
Exemple #3
0
    def setUp(self):

        self.request = WPSRequest()
        self.tempfile = tempfile.mktemp()

        x = open(self.tempfile, 'w')
        x.write("ahoj")
        x.close()
Exemple #4
0
 def execute_job(self, process_id, wps_inputs, wps_outputs, mode, job_uuid):
     """
     Real execution of the process by active Celery Worker.
     """
     execution = WPSExecution(version="2.0", url="localhost")
     xml_request = execution.buildRequest(process_id,
                                          wps_inputs,
                                          wps_outputs,
                                          mode=mode,
                                          lineage=True)
     wps_request = WPSRequest()
     wps_request.identifier = process_id
     wps_request.set_version("2.0.0")
     request_parser = wps_request._post_request_parser(
         wps_request.WPS.Execute().tag)  # noqa: W0212
     request_parser(xml_request)
     # NOTE:
     #  Setting 'status = false' will disable async execution of 'pywps.app.Process.Process'
     #  but this is needed since this job is running within Celery worker already async
     #  (daemon process can't have children processes)
     #  Because if how the code in PyWPS is made, we have to re-enable creation of status file
     wps_request.status = "false"
     wps_response = super(WorkerService,
                          self).execute(process_id, wps_request, job_uuid)
     wps_response.store_status_file = True
     # update execution status with actual status file and apply required references
     execution = check_wps_status(
         location=wps_response.process.status_location,
         settings=self.settings)
     execution.request = xml_request
     return execution
Exemple #5
0
    def _handler(self, request: WPSRequest, response: ExecuteResponse):
        self.write_log("Processing started", response, 5)

        variable = self.get_input_or_none(request.inputs, "variable")
        rcp = self.get_input_or_none(request.inputs, "rcp")
        lat0 = self.get_input_or_none(request.inputs, "lat0")
        lon0 = self.get_input_or_none(request.inputs, "lon0")
        output_format = request.inputs["output_format"][0].data

        output_filename = f"BCCAQv2_subset_{lat0}_{lon0}"

        self.write_log("Fetching BCCAQv2 datasets", response, 6)
        request.inputs = get_bccaqv2_inputs(request.inputs, variable, rcp)

        self.write_log("Running subset", response, 7)

        threads = int(configuration.get_config_value("finch",
                                                     "subset_threads"))

        metalink = self.subset(
            request.inputs,
            response,
            start_percentage=7,
            end_percentage=90,
            threads=threads,
        )

        if not metalink.files:
            message = "No data was produced when subsetting using the provided bounds."
            raise ProcessError(message)

        self.write_log("Subset done, creating zip file", response)

        output_files = [mf.file for mf in metalink.files]

        if output_format == "csv":
            csv_files, metadata_folder = netcdf_to_csv(
                output_files,
                output_folder=Path(self.workdir),
                filename_prefix=output_filename,
            )
            output_files = csv_files + [metadata_folder]

        output_zip = Path(self.workdir) / (output_filename + ".zip")

        def log(message_, percentage_):
            self.write_log(message_, response, percentage_)

        zip_files(output_zip,
                  output_files,
                  log_function=log,
                  start_percentage=90)
        response.outputs["output"].file = output_zip

        self.write_log("Processing finished successfully", response, 99)
        return response
Exemple #6
0
 def setUp(self):
     self.uuid = uuid.uuid1()
     self.dummy_process = InOut()
     self.dummy_process._set_uuid(self.uuid)
     self.dummy_process.set_workdir('/tmp')
     self.wps_request = WPSRequest()
     self.wps_response = ExecuteResponse(self.wps_request, self.uuid,
                                         process=self.dummy_process)
     self.job = Job(
         process=self.dummy_process,
         wps_request=self.wps_request,
         wps_response=self.wps_response)
Exemple #7
0
def test_parse_wps_input():
    obj = {
        "operation": "execute",
        "version": "1.0.0",
        "language": "eng",
        "identifier": "subset",
        "identifiers": "subset",  # TODO: why identifierS?
        "store_execute": True,
        "status": True,
        "lineage": True,
        "inputs": {
            "time": [{
                "identifier": "time",
                "type": "literal",
                "data_type": "string",
                "allowed_values": [{
                    "type": "anyvalue"
                }],
                "data": "1970/1980",
            }],
            "time_components": [{
                "identifier": "time",
                "type": "literal",
                "data_type": "string",
                "allowed_values": [{
                    "type": "anyvalue"
                }],
                "data": "year:1970,1980|month=01,02,03",
            }],
        },
        "outputs": {},
        "raw": False,
    }

    request = WPSRequest()
    request.json = obj

    assert parse_wps_input(request.inputs, "time", default=None) == "1970/1980"
    assert (parse_wps_input(request.inputs, "time_components",
                            default=None) == "year:1970,1980|month=01,02,03")
Exemple #8
0
    def setUp(self):
        def handler(request, response):
            response.outputs['output'].data = '42'
            return response

        self.uuid = 1234
        self.dummy_process = Process(
            handler=handler,
            identifier='dummy',
            title='Dummy Process',
            outputs=[LiteralOutput('output', 'Output', data_type='string')])
        self.wps_request = WPSRequest()
        self.wps_response = ExecuteResponse(self.wps_request, self.uuid,
                process=self.dummy_process)
Exemple #9
0
    def test_json_in(self):

        obj = {
            'operation': 'getcapabilities',
            'version': '1.0.0',
            'language': 'eng',
            'identifier': 'ahoj',
            'identifiers': 'ahoj',  # TODO: why identifierS?
            'store_execute': True,
            'status': True,
            'lineage': True,
            'inputs': {
                'myin': [{
                    'identifier': 'myin',
                    'type': 'complex',
                    'supported_formats': [{
                        'mime_type': 'tralala'
                    }],
                    'file': self.tempfile,
                    'data_format': {
                        'mime_type': 'tralala'
                    }
                }],
                'myliteral': [{
                    'identifier': 'myliteral',
                    'type': 'literal',
                    'data_type': 'integer',
                    'allowed_values': [{
                        'type': 'anyvalue'
                    }],
                    'data': 1
                }]
            },
            'outputs': {},
            'raw': False
        }

        self.request = WPSRequest()
        self.request.json = obj

        self.assertEqual(self.request.inputs['myliteral'][0].data, 1,
                         'Data are in the file')
        self.assertEqual(self.request.inputs['myin'][0].data, 'ahoj',
                         'Data are in the file')
        self.assertListEqual(
            self.request.inputs['myliteral'][0].allowed_values, [AnyValue()],
            'Any value not set')
        self.assertTrue(self.request.inputs['myliteral'][0].any_value,
                        'Any value set')
Exemple #10
0
def test_wps3_process_step_io_data_or_href():
    """
    Validates that 'data' literal values and 'href' file references are both handled as input for workflow steps
    corresponding to a WPS-3 process.
    """
    test_process = "test-wps3-process-step-io-data-or-href"
    test_reached_parse_inputs = False  # toggle at operation just before what is being tested here
    test_cwl_inputs = {
        "single-data-value":
        1,
        "multi-data-values": [2, 3],
        "single-href-file": {
            "location": "https://random-place"
        },
        "multi-href-files": [{
            "location": "https://random-place"
        }, {
            "location": "https://another-place"
        }]
    }
    expected_wps_inputs = [
        {
            "id": "single-data-value",
            "data": 1
        },
        {
            "id": "multi-data-values",
            "data": 2
        },
        {
            "id": "multi-data-values",
            "data": 3
        },
        {
            "id": "single-href-file",
            "href": "https://random-place"
        },
        {
            "id": "multi-href-files",
            "href": "https://random-place"
        },
        {
            "id": "multi-href-files",
            "href": "https://another-place"
        },
    ]

    class TestDoneEarlyExit(Exception):
        """Dummy exception to raise to skip further processing steps after the portion to evaluate was reached."""

    def mock_wps_request(method, url, *_, **kwargs):
        nonlocal test_reached_parse_inputs

        method = method.upper()
        if url.endswith("/visibility"):
            resp = Response()
            resp.status_code = 200
            resp._content = json.dumps({
                "value": VISIBILITY_PUBLIC
            },
                                       ensure_ascii=False).encode()
            resp.headers = {"Content-Type": CONTENT_TYPE_APP_NETCDF}
            resp.encoding = None
            if method == "PUT":
                test_reached_parse_inputs = True  # last operation before parsing I/O is setting visibility
            return resp
        if method == "POST" and url.endswith(test_process + "/jobs"):
            # actual evaluation of intended handling of CWL inputs conversion to WPS-3 execute request
            assert kwargs.get("json", {}).get("inputs") == expected_wps_inputs
            raise TestDoneEarlyExit(
                "Expected exception raised to skip executed job status monitoring"
            )
        raise AssertionError("unhandled mocked 'make_request' call")

    def mock_update_status(*_, **__):
        return None

    mock_data_sources = {
        "localhost": {
            "netloc": "localhost",
            "ades": "https://localhost:4001",
            "default": True
        }
    }
    with mock.patch(
            "weaver.processes.wps_process_base.WpsProcessInterface.make_request",
            side_effect=mock_wps_request):
        with mock.patch("weaver.processes.sources.fetch_data_sources",
                        return_value=mock_data_sources):
            wps_params = {
                "service": "wps",
                "request": "execute",
                "identifier": test_process,
                "version": "1.0.0"
            }
            req = Request(method="GET", params=wps_params)
            setattr(req, "args", wps_params)
            req = WPSRequest(req)
            wps = Wps3Process({}, {}, test_process, req, mock_update_status)
            try:
                wps.execute(test_cwl_inputs, "", {})
            except TestDoneEarlyExit:
                pass  # successful test / expected handling
            except Exception as exc:  # noqa
                if not test_reached_parse_inputs:
                    msg = "Prior error was raised [{}], could not evaluate intended handling of inputs".format(
                        exc)
                    pytest.fail(msg)
                msg = "Other error was raised [{}], inputs where not correctly handled somewhere".format(
                    exc)
                pytest.fail(msg)
    def _handler(self, request: WPSRequest, response: ExecuteResponse):

        convert_to_csv = request.inputs["output_format"][0].data == "csv"
        if not convert_to_csv:
            del self.status_percentage_steps["convert_to_csv"]

        write_log(self, "Processing started", process_step="start")

        # Temporary backward-compatibility adjustment.
        # Remove me when lon0 and lat0 are removed
        lon, lat, lon0, lat0 = [
            single_input_or_none(request.inputs, var)
            for var in "lon lat lon0 lat0".split()
        ]
        if not (lon and lat or lon0 and lat0):
            raise ProcessError(
                "Provide both lat and lon or both lon0 and lat0.")
        request.inputs.setdefault("lon", request.inputs.get("lon0"))
        request.inputs.setdefault("lat", request.inputs.get("lat0"))
        # End of 'remove me'

        output_filename = make_output_filename(self, request.inputs)

        write_log(self, "Fetching datasets")

        variable = request.inputs["variable"][0].data
        variables = None if variable is None else [variable]
        rcp = single_input_or_none(request.inputs, "rcp")

        dataset_name = single_input_or_none(request.inputs, "dataset_name")
        request.inputs["resource"] = get_datasets(dataset_name,
                                                  workdir=self.workdir,
                                                  variables=variables,
                                                  rcp=rcp)

        write_log(self, "Running subset", process_step="subset")

        output_files = finch_subset_gridpoint(
            self,
            netcdf_inputs=request.inputs["resource"],
            request_inputs=request.inputs,
        )

        if not output_files:
            message = "No data was produced when subsetting using the provided bounds."
            raise ProcessError(message)

        if convert_to_csv:
            write_log(self,
                      "Converting outputs to csv",
                      process_step="convert_to_csv")

            csv_files, metadata_folder = netcdf_file_list_to_csv(
                output_files,
                output_folder=Path(self.workdir),
                filename_prefix=output_filename,
                csv_precision=single_input_or_none(request.inputs,
                                                   "csv_precision"))
            output_files = csv_files + [metadata_folder]

        write_log(self, "Zipping outputs", process_step="zip_outputs")

        output_zip = Path(self.workdir) / (output_filename + ".zip")

        def _log(message, percentage):
            write_log(self, message, subtask_percentage=percentage)

        zip_files(output_zip, output_files, log_function=_log)

        response.outputs["output"].file = output_zip

        write_log(self,
                  "Processing finished successfully",
                  process_step="done")
        return response
    def _handler(self, request: WPSRequest, response: ExecuteResponse):
        self.write_log("Processing started", response, 5)

        lat = request.inputs["lat"][0].data
        lon = request.inputs["lon"][0].data
        output_format = request.inputs["output_format"][0].data
        output_filename = f"BCCAQv2_subset_heat_wave_frequency_{lat}_{lon}"

        self.write_log("Fetching BCCAQv2 datasets", response, 6)
        tasmin_inputs = get_bccaqv2_inputs(request.inputs, "tasmin")["resource"]
        tasmax_inputs = get_bccaqv2_inputs(request.inputs, "tasmax")["resource"]

        request.inputs["resource"] = tasmin_inputs + tasmax_inputs

        self.write_log("Running subset", response, 7)

        threads = int(configuration.get_config_value("finch", "subset_threads"))

        metalink = self.subset(
            request.inputs,
            response,
            start_percentage=7,
            end_percentage=50,
            threads=threads,
        )

        if not metalink.files:
            message = "No data was produced when subsetting using the provided bounds."
            raise ProcessError(message)

        self.write_log("Subset done, calculating indices", response)

        all_files = [Path(f.file) for f in metalink.files]

        start_percentage = 50
        end_percentage = 95

        pairs = list(self._make_tasmin_tasmax_pairs(all_files))
        n_pairs = len(pairs)

        output_files = []

        for n, (tasmin, tasmax) in enumerate(pairs):
            percentage = start_percentage + int(
                n / n_pairs * (end_percentage - start_percentage)
            )
            self.write_log(
                f"Processing file {n + 1} of {n_pairs}", response, percentage
            )

            compute_inputs = [i.identifier for i in self.indices_process.inputs]
            inputs = {k: v for k, v in request.inputs.items() if k in compute_inputs}

            inputs["tasmin"] = deque([make_nc_input("tasmin")], maxlen=1)
            inputs["tasmin"][0].file = str(tasmin)
            inputs["tasmax"] = deque([make_nc_input("tasmax")], maxlen=1)
            inputs["tasmax"][0].file = str(tasmax)

            out = self.compute_indices(self.indices_process.xci, inputs)
            out_fn = Path(self.workdir) / tasmin.name.replace(
                "tasmin", "heat_wave_frequency"
            )
            out.to_netcdf(out_fn)
            output_files.append(out_fn)

        if output_format == "csv":
            csv_files, metadata_folder = netcdf_to_csv(
                output_files,
                output_folder=Path(self.workdir),
                filename_prefix=output_filename,
            )
            output_files = csv_files + [metadata_folder]

        output_zip = Path(self.workdir) / (output_filename + ".zip")

        def log(message_, percentage_):
            self.write_log(message_, response, percentage_)

        zip_files(output_zip, output_files, log_function=log, start_percentage=90)
        response.outputs["output"].file = output_zip

        self.write_log("Processing finished successfully", response, 99)
        return response
Exemple #13
0
    def test_json_inout_datetime(self):
        obj = {
            'operation': 'getcapabilities',
            'version': '1.0.0',
            'language': 'eng',
            'identifier': 'moinmoin',
            'identifiers': 'moinmoin',  # TODO: why identifierS?
            'store_execute': True,
            'status': True,
            'lineage': True,
            'inputs': {
                'datetime': [{
                    'identifier': 'datetime',
                    'type': 'literal',
                    'data_type': 'dateTime',
                    'data': '2017-04-20T12:00:00',
                    'allowed_values': [{
                        'type': 'anyvalue'
                    }],
                }],
                'date': [{
                    'identifier': 'date',
                    'type': 'literal',
                    'data_type': 'date',
                    'data': '2017-04-20',
                    'allowed_values': [{
                        'type': 'anyvalue'
                    }],
                }],
                'time': [{
                    'identifier': 'time',
                    'type': 'literal',
                    'data_type': 'time',
                    'data': '09:00:00',
                    'allowed_values': [{
                        'type': 'anyvalue'
                    }],
                }],
            },
            'outputs': {},
            'raw': False
        }

        self.request = WPSRequest()
        self.request.json = obj

        self.assertEqual(self.request.inputs['datetime'][0].data,
                         datetime.datetime(2017, 4, 20, 12), 'Datatime set')
        self.assertEqual(self.request.inputs['date'][0].data,
                         datetime.date(2017, 4, 20), 'Data set')
        self.assertEqual(self.request.inputs['time'][0].data,
                         datetime.time(9, 0, 0), 'Time set')

        # dump to json and reload
        dump = self.request.json
        self.request.json = json.loads(dump)

        self.assertEqual(self.request.inputs['datetime'][0].data,
                         datetime.datetime(2017, 4, 20, 12), 'Datatime set')
        self.assertEqual(self.request.inputs['date'][0].data,
                         datetime.date(2017, 4, 20), 'Data set')
        self.assertEqual(self.request.inputs['time'][0].data,
                         datetime.time(9, 0, 0), 'Time set')
Exemple #14
0
def test_stdout_stderr_logging_for_commandline_tool_failure():
    """
    Execute a process and assert that stderr is correctly logged to log file.
    """
    process = Process({
        "title": "test-stdout-stderr",
        "id": "test-stdout-stderr",
        "package": {
            "cwlVersion": "v1.0",
            "class": "CommandLineTool",
            "baseCommand": "not_existing_command",
            "inputs": {
                "message": {
                    "type": "string",
                    "inputBinding": {
                        "position": 1
                    }
                }
            },
            "outputs": {}
        }
    })

    payload = process
    package = process["package"]
    title = process["title"]
    identifier = process["id"]

    # WPSPackage._handle()
    log_file = tempfile.NamedTemporaryFile()
    status_location = log_file.name
    workdir = tempfile.TemporaryDirectory()

    class TestWpsPackage(WpsPackage):
        @property
        def status_location(self):
            return status_location

    wps_package_instance = TestWpsPackage(identifier=identifier,
                                          title=title,
                                          payload=payload,
                                          package=package)
    wps_package_instance.set_workdir(workdir.name)

    # WPSRequest mock
    wps_request = WPSRequest()
    wps_request.json = {
        "identifier": "test-stdout-stderr",
        "operation": "execute",
        "version": "1.0.0",
        "language": "null",
        "identifiers": "null",
        "store_execute": "true",
        "status": "true",
        "lineage": "true",
        "raw": "false",
        "inputs": {
            "message": [{
                "identifier": "message",
                "title": "A dummy message",
                "type": "literal",
                "data_type": "string",
                "data": "Dummy message",
                "allowed_values": [],
            }]
        },
        "outputs": {}
    }

    # ExecuteResponse mock
    wps_response = type("", (object, ), {
        "_update_status": lambda *_, **__: 1
    })()

    from weaver.exceptions import PackageExecutionError

    try:
        wps_package_instance._handler(wps_request, wps_response)
    except PackageExecutionError as exception:
        assert "Completed permanentFail" in exception.args[0]
    else:
        fail(
            "\"wps_package._handler()\" was expected to throw \"PackageExecutionError\" exception"
        )
Exemple #15
0
def test_stdout_stderr_logging_for_commandline_tool_success():
    """
    Execute a process and assert that stdout is correctly logged to log file.
    """
    process = Process({
        "title": "test-stdout-stderr",
        "id": "test-stdout-stderr",
        "package": {
            "cwlVersion": "v1.0",
            "class": "CommandLineTool",
            "baseCommand": "echo",
            "inputs": {
                "message": {
                    "type": "string",
                    "inputBinding": {
                        "position": 1
                    }
                }
            },
            "outputs": {}
        }
    })

    payload = process
    package = process["package"]
    title = process["title"]
    identifier = process["id"]

    # WPSPackage._handle()
    log_file = tempfile.NamedTemporaryFile()
    status_location = log_file.name
    workdir = tempfile.TemporaryDirectory()

    class TestWpsPackage(WpsPackage):
        @property
        def status_location(self):
            return status_location

    wps_package_instance = TestWpsPackage(identifier=identifier,
                                          title=title,
                                          payload=payload,
                                          package=package)
    wps_package_instance.set_workdir(workdir.name)

    # WPSRequest mock
    wps_request = WPSRequest()
    wps_request.json = {
        "identifier": "test-stdout-stderr",
        "operation": "execute",
        "version": "1.0.0",
        "language": "null",
        "identifiers": "null",
        "store_execute": "true",
        "status": "true",
        "lineage": "true",
        "raw": "false",
        "inputs": {
            "message": [{
                "identifier": "message",
                "title": "A dummy message",
                "type": "literal",
                "data_type": "string",
                "data": "Dummy message",
                "allowed_values": [],
            }]
        },
        "outputs": {}
    }

    # ExecuteResponse mock
    wps_response = type("", (object, ), {
        "_update_status": lambda *_, **__: 1
    })()

    wps_package_instance._handler(wps_request, wps_response)

    # log assertions
    with open(status_location + ".log", "r") as file:
        log_data = file.read()
        assert "Dummy message" in log_data
    def _handler(self, request: WPSRequest, response: ExecuteResponse):

        convert_to_csv = request.inputs["output_format"][0].data == "csv"
        if not convert_to_csv:
            del self.status_percentage_steps["convert_to_csv"]

        write_log(self, "Processing started", process_step="start")

        output_filename = make_output_filename(self, request.inputs)

        write_log(self, "Fetching datasets")

        variable = request.inputs["variable"][0].data
        variables = None if variable is None else [variable]
        rcp = single_input_or_none(request.inputs, "rcp")

        dataset_name = single_input_or_none(request.inputs, "dataset_name")
        request.inputs["resource"] = get_datasets(dataset_name,
                                                  workdir=self.workdir,
                                                  variables=variables,
                                                  rcp=rcp)

        write_log(self, "Running subset", process_step="subset")

        output_files = finch_subset_bbox(
            self,
            netcdf_inputs=request.inputs["resource"],
            request_inputs=request.inputs,
        )

        if not output_files:
            message = "No data was produced when subsetting using the provided bounds."
            raise ProcessError(message)

        if convert_to_csv:
            write_log(self,
                      "Converting outputs to csv",
                      process_step="convert_to_csv")

            csv_files, metadata_folder = netcdf_file_list_to_csv(
                output_files,
                output_folder=Path(self.workdir),
                filename_prefix=output_filename,
            )
            output_files = csv_files + [metadata_folder]

        write_log(self, "Zipping outputs", process_step="zip_outputs")

        output_zip = Path(self.workdir) / (output_filename + ".zip")

        def _log(message, percentage):
            write_log(self, message, subtask_percentage=percentage)

        zip_files(output_zip, output_files, log_function=_log)

        response.outputs["output"].file = output_zip

        write_log(self,
                  "Processing finished successfully",
                  process_step="done")
        return response