def from_json(cls, value): """init this request from json back again :param value: the json (not string) representation """ process = Process.from_json(value['process']) wps_request = WPSRequest() wps_request.json = json.loads(value['wps_request']) wps_response = ExecuteResponse(wps_request=wps_request, uuid=process.uuid, process=process) wps_response.store_status_file = True new_job = Job(process=Process.from_json(value['process']), wps_request=wps_request, wps_response=wps_response) return new_job
def test_insert_request(self): requests = self.session.query(RequestInstance) assert requests.count() == 0 obj = { 'operation': 'execute', 'version': '1.0.0', 'language': 'eng', 'identifier': 'multiple-outputs', 'store_execute': True, 'status': True, 'lineage': True, 'inputs': { 'count': [{ 'identifier': 'count', 'type': 'literal', 'data': 3 }] }, 'outputs': {}, 'raw': False } request = WPSRequest() request.json = obj from .processes.metalinkprocess import MultipleOutputs process = MultipleOutputs() process.uuid = uuid.uuid4() store_request(process.uuid, request, process) requests = self.session.query(RequestInstance) assert requests.count() == 1 stored_request = pop_first_stored() job = Job.from_json({ "process": json.loads(stored_request.process), "wps_request": json.loads(stored_request.request) }) assert job.process.identifier == "multiple-outputs" assert str(job.process.uuid) == str(process.uuid) requests = self.session.query(RequestInstance) assert requests.count() == 0
def setUp(self): self.request = WPSRequest() self.tempfile = tempfile.mktemp() x = open(self.tempfile, 'w') x.write("ahoj") x.close()
def execute_job(self, process_id, wps_inputs, wps_outputs, mode, job_uuid): """ Real execution of the process by active Celery Worker. """ execution = WPSExecution(version="2.0", url="localhost") xml_request = execution.buildRequest(process_id, wps_inputs, wps_outputs, mode=mode, lineage=True) wps_request = WPSRequest() wps_request.identifier = process_id wps_request.set_version("2.0.0") request_parser = wps_request._post_request_parser( wps_request.WPS.Execute().tag) # noqa: W0212 request_parser(xml_request) # NOTE: # Setting 'status = false' will disable async execution of 'pywps.app.Process.Process' # but this is needed since this job is running within Celery worker already async # (daemon process can't have children processes) # Because if how the code in PyWPS is made, we have to re-enable creation of status file wps_request.status = "false" wps_response = super(WorkerService, self).execute(process_id, wps_request, job_uuid) wps_response.store_status_file = True # update execution status with actual status file and apply required references execution = check_wps_status( location=wps_response.process.status_location, settings=self.settings) execution.request = xml_request return execution
def _handler(self, request: WPSRequest, response: ExecuteResponse): self.write_log("Processing started", response, 5) variable = self.get_input_or_none(request.inputs, "variable") rcp = self.get_input_or_none(request.inputs, "rcp") lat0 = self.get_input_or_none(request.inputs, "lat0") lon0 = self.get_input_or_none(request.inputs, "lon0") output_format = request.inputs["output_format"][0].data output_filename = f"BCCAQv2_subset_{lat0}_{lon0}" self.write_log("Fetching BCCAQv2 datasets", response, 6) request.inputs = get_bccaqv2_inputs(request.inputs, variable, rcp) self.write_log("Running subset", response, 7) threads = int(configuration.get_config_value("finch", "subset_threads")) metalink = self.subset( request.inputs, response, start_percentage=7, end_percentage=90, threads=threads, ) if not metalink.files: message = "No data was produced when subsetting using the provided bounds." raise ProcessError(message) self.write_log("Subset done, creating zip file", response) output_files = [mf.file for mf in metalink.files] if output_format == "csv": csv_files, metadata_folder = netcdf_to_csv( output_files, output_folder=Path(self.workdir), filename_prefix=output_filename, ) output_files = csv_files + [metadata_folder] output_zip = Path(self.workdir) / (output_filename + ".zip") def log(message_, percentage_): self.write_log(message_, response, percentage_) zip_files(output_zip, output_files, log_function=log, start_percentage=90) response.outputs["output"].file = output_zip self.write_log("Processing finished successfully", response, 99) return response
def setUp(self): self.uuid = uuid.uuid1() self.dummy_process = InOut() self.dummy_process._set_uuid(self.uuid) self.dummy_process.set_workdir('/tmp') self.wps_request = WPSRequest() self.wps_response = ExecuteResponse(self.wps_request, self.uuid, process=self.dummy_process) self.job = Job( process=self.dummy_process, wps_request=self.wps_request, wps_response=self.wps_response)
def test_parse_wps_input(): obj = { "operation": "execute", "version": "1.0.0", "language": "eng", "identifier": "subset", "identifiers": "subset", # TODO: why identifierS? "store_execute": True, "status": True, "lineage": True, "inputs": { "time": [{ "identifier": "time", "type": "literal", "data_type": "string", "allowed_values": [{ "type": "anyvalue" }], "data": "1970/1980", }], "time_components": [{ "identifier": "time", "type": "literal", "data_type": "string", "allowed_values": [{ "type": "anyvalue" }], "data": "year:1970,1980|month=01,02,03", }], }, "outputs": {}, "raw": False, } request = WPSRequest() request.json = obj assert parse_wps_input(request.inputs, "time", default=None) == "1970/1980" assert (parse_wps_input(request.inputs, "time_components", default=None) == "year:1970,1980|month=01,02,03")
def setUp(self): def handler(request, response): response.outputs['output'].data = '42' return response self.uuid = 1234 self.dummy_process = Process( handler=handler, identifier='dummy', title='Dummy Process', outputs=[LiteralOutput('output', 'Output', data_type='string')]) self.wps_request = WPSRequest() self.wps_response = ExecuteResponse(self.wps_request, self.uuid, process=self.dummy_process)
def test_json_in(self): obj = { 'operation': 'getcapabilities', 'version': '1.0.0', 'language': 'eng', 'identifier': 'ahoj', 'identifiers': 'ahoj', # TODO: why identifierS? 'store_execute': True, 'status': True, 'lineage': True, 'inputs': { 'myin': [{ 'identifier': 'myin', 'type': 'complex', 'supported_formats': [{ 'mime_type': 'tralala' }], 'file': self.tempfile, 'data_format': { 'mime_type': 'tralala' } }], 'myliteral': [{ 'identifier': 'myliteral', 'type': 'literal', 'data_type': 'integer', 'allowed_values': [{ 'type': 'anyvalue' }], 'data': 1 }] }, 'outputs': {}, 'raw': False } self.request = WPSRequest() self.request.json = obj self.assertEqual(self.request.inputs['myliteral'][0].data, 1, 'Data are in the file') self.assertEqual(self.request.inputs['myin'][0].data, 'ahoj', 'Data are in the file') self.assertListEqual( self.request.inputs['myliteral'][0].allowed_values, [AnyValue()], 'Any value not set') self.assertTrue(self.request.inputs['myliteral'][0].any_value, 'Any value set')
def test_wps3_process_step_io_data_or_href(): """ Validates that 'data' literal values and 'href' file references are both handled as input for workflow steps corresponding to a WPS-3 process. """ test_process = "test-wps3-process-step-io-data-or-href" test_reached_parse_inputs = False # toggle at operation just before what is being tested here test_cwl_inputs = { "single-data-value": 1, "multi-data-values": [2, 3], "single-href-file": { "location": "https://random-place" }, "multi-href-files": [{ "location": "https://random-place" }, { "location": "https://another-place" }] } expected_wps_inputs = [ { "id": "single-data-value", "data": 1 }, { "id": "multi-data-values", "data": 2 }, { "id": "multi-data-values", "data": 3 }, { "id": "single-href-file", "href": "https://random-place" }, { "id": "multi-href-files", "href": "https://random-place" }, { "id": "multi-href-files", "href": "https://another-place" }, ] class TestDoneEarlyExit(Exception): """Dummy exception to raise to skip further processing steps after the portion to evaluate was reached.""" def mock_wps_request(method, url, *_, **kwargs): nonlocal test_reached_parse_inputs method = method.upper() if url.endswith("/visibility"): resp = Response() resp.status_code = 200 resp._content = json.dumps({ "value": VISIBILITY_PUBLIC }, ensure_ascii=False).encode() resp.headers = {"Content-Type": CONTENT_TYPE_APP_NETCDF} resp.encoding = None if method == "PUT": test_reached_parse_inputs = True # last operation before parsing I/O is setting visibility return resp if method == "POST" and url.endswith(test_process + "/jobs"): # actual evaluation of intended handling of CWL inputs conversion to WPS-3 execute request assert kwargs.get("json", {}).get("inputs") == expected_wps_inputs raise TestDoneEarlyExit( "Expected exception raised to skip executed job status monitoring" ) raise AssertionError("unhandled mocked 'make_request' call") def mock_update_status(*_, **__): return None mock_data_sources = { "localhost": { "netloc": "localhost", "ades": "https://localhost:4001", "default": True } } with mock.patch( "weaver.processes.wps_process_base.WpsProcessInterface.make_request", side_effect=mock_wps_request): with mock.patch("weaver.processes.sources.fetch_data_sources", return_value=mock_data_sources): wps_params = { "service": "wps", "request": "execute", "identifier": test_process, "version": "1.0.0" } req = Request(method="GET", params=wps_params) setattr(req, "args", wps_params) req = WPSRequest(req) wps = Wps3Process({}, {}, test_process, req, mock_update_status) try: wps.execute(test_cwl_inputs, "", {}) except TestDoneEarlyExit: pass # successful test / expected handling except Exception as exc: # noqa if not test_reached_parse_inputs: msg = "Prior error was raised [{}], could not evaluate intended handling of inputs".format( exc) pytest.fail(msg) msg = "Other error was raised [{}], inputs where not correctly handled somewhere".format( exc) pytest.fail(msg)
def _handler(self, request: WPSRequest, response: ExecuteResponse): convert_to_csv = request.inputs["output_format"][0].data == "csv" if not convert_to_csv: del self.status_percentage_steps["convert_to_csv"] write_log(self, "Processing started", process_step="start") # Temporary backward-compatibility adjustment. # Remove me when lon0 and lat0 are removed lon, lat, lon0, lat0 = [ single_input_or_none(request.inputs, var) for var in "lon lat lon0 lat0".split() ] if not (lon and lat or lon0 and lat0): raise ProcessError( "Provide both lat and lon or both lon0 and lat0.") request.inputs.setdefault("lon", request.inputs.get("lon0")) request.inputs.setdefault("lat", request.inputs.get("lat0")) # End of 'remove me' output_filename = make_output_filename(self, request.inputs) write_log(self, "Fetching datasets") variable = request.inputs["variable"][0].data variables = None if variable is None else [variable] rcp = single_input_or_none(request.inputs, "rcp") dataset_name = single_input_or_none(request.inputs, "dataset_name") request.inputs["resource"] = get_datasets(dataset_name, workdir=self.workdir, variables=variables, rcp=rcp) write_log(self, "Running subset", process_step="subset") output_files = finch_subset_gridpoint( self, netcdf_inputs=request.inputs["resource"], request_inputs=request.inputs, ) if not output_files: message = "No data was produced when subsetting using the provided bounds." raise ProcessError(message) if convert_to_csv: write_log(self, "Converting outputs to csv", process_step="convert_to_csv") csv_files, metadata_folder = netcdf_file_list_to_csv( output_files, output_folder=Path(self.workdir), filename_prefix=output_filename, csv_precision=single_input_or_none(request.inputs, "csv_precision")) output_files = csv_files + [metadata_folder] write_log(self, "Zipping outputs", process_step="zip_outputs") output_zip = Path(self.workdir) / (output_filename + ".zip") def _log(message, percentage): write_log(self, message, subtask_percentage=percentage) zip_files(output_zip, output_files, log_function=_log) response.outputs["output"].file = output_zip write_log(self, "Processing finished successfully", process_step="done") return response
def _handler(self, request: WPSRequest, response: ExecuteResponse): self.write_log("Processing started", response, 5) lat = request.inputs["lat"][0].data lon = request.inputs["lon"][0].data output_format = request.inputs["output_format"][0].data output_filename = f"BCCAQv2_subset_heat_wave_frequency_{lat}_{lon}" self.write_log("Fetching BCCAQv2 datasets", response, 6) tasmin_inputs = get_bccaqv2_inputs(request.inputs, "tasmin")["resource"] tasmax_inputs = get_bccaqv2_inputs(request.inputs, "tasmax")["resource"] request.inputs["resource"] = tasmin_inputs + tasmax_inputs self.write_log("Running subset", response, 7) threads = int(configuration.get_config_value("finch", "subset_threads")) metalink = self.subset( request.inputs, response, start_percentage=7, end_percentage=50, threads=threads, ) if not metalink.files: message = "No data was produced when subsetting using the provided bounds." raise ProcessError(message) self.write_log("Subset done, calculating indices", response) all_files = [Path(f.file) for f in metalink.files] start_percentage = 50 end_percentage = 95 pairs = list(self._make_tasmin_tasmax_pairs(all_files)) n_pairs = len(pairs) output_files = [] for n, (tasmin, tasmax) in enumerate(pairs): percentage = start_percentage + int( n / n_pairs * (end_percentage - start_percentage) ) self.write_log( f"Processing file {n + 1} of {n_pairs}", response, percentage ) compute_inputs = [i.identifier for i in self.indices_process.inputs] inputs = {k: v for k, v in request.inputs.items() if k in compute_inputs} inputs["tasmin"] = deque([make_nc_input("tasmin")], maxlen=1) inputs["tasmin"][0].file = str(tasmin) inputs["tasmax"] = deque([make_nc_input("tasmax")], maxlen=1) inputs["tasmax"][0].file = str(tasmax) out = self.compute_indices(self.indices_process.xci, inputs) out_fn = Path(self.workdir) / tasmin.name.replace( "tasmin", "heat_wave_frequency" ) out.to_netcdf(out_fn) output_files.append(out_fn) if output_format == "csv": csv_files, metadata_folder = netcdf_to_csv( output_files, output_folder=Path(self.workdir), filename_prefix=output_filename, ) output_files = csv_files + [metadata_folder] output_zip = Path(self.workdir) / (output_filename + ".zip") def log(message_, percentage_): self.write_log(message_, response, percentage_) zip_files(output_zip, output_files, log_function=log, start_percentage=90) response.outputs["output"].file = output_zip self.write_log("Processing finished successfully", response, 99) return response
def test_json_inout_datetime(self): obj = { 'operation': 'getcapabilities', 'version': '1.0.0', 'language': 'eng', 'identifier': 'moinmoin', 'identifiers': 'moinmoin', # TODO: why identifierS? 'store_execute': True, 'status': True, 'lineage': True, 'inputs': { 'datetime': [{ 'identifier': 'datetime', 'type': 'literal', 'data_type': 'dateTime', 'data': '2017-04-20T12:00:00', 'allowed_values': [{ 'type': 'anyvalue' }], }], 'date': [{ 'identifier': 'date', 'type': 'literal', 'data_type': 'date', 'data': '2017-04-20', 'allowed_values': [{ 'type': 'anyvalue' }], }], 'time': [{ 'identifier': 'time', 'type': 'literal', 'data_type': 'time', 'data': '09:00:00', 'allowed_values': [{ 'type': 'anyvalue' }], }], }, 'outputs': {}, 'raw': False } self.request = WPSRequest() self.request.json = obj self.assertEqual(self.request.inputs['datetime'][0].data, datetime.datetime(2017, 4, 20, 12), 'Datatime set') self.assertEqual(self.request.inputs['date'][0].data, datetime.date(2017, 4, 20), 'Data set') self.assertEqual(self.request.inputs['time'][0].data, datetime.time(9, 0, 0), 'Time set') # dump to json and reload dump = self.request.json self.request.json = json.loads(dump) self.assertEqual(self.request.inputs['datetime'][0].data, datetime.datetime(2017, 4, 20, 12), 'Datatime set') self.assertEqual(self.request.inputs['date'][0].data, datetime.date(2017, 4, 20), 'Data set') self.assertEqual(self.request.inputs['time'][0].data, datetime.time(9, 0, 0), 'Time set')
def test_stdout_stderr_logging_for_commandline_tool_failure(): """ Execute a process and assert that stderr is correctly logged to log file. """ process = Process({ "title": "test-stdout-stderr", "id": "test-stdout-stderr", "package": { "cwlVersion": "v1.0", "class": "CommandLineTool", "baseCommand": "not_existing_command", "inputs": { "message": { "type": "string", "inputBinding": { "position": 1 } } }, "outputs": {} } }) payload = process package = process["package"] title = process["title"] identifier = process["id"] # WPSPackage._handle() log_file = tempfile.NamedTemporaryFile() status_location = log_file.name workdir = tempfile.TemporaryDirectory() class TestWpsPackage(WpsPackage): @property def status_location(self): return status_location wps_package_instance = TestWpsPackage(identifier=identifier, title=title, payload=payload, package=package) wps_package_instance.set_workdir(workdir.name) # WPSRequest mock wps_request = WPSRequest() wps_request.json = { "identifier": "test-stdout-stderr", "operation": "execute", "version": "1.0.0", "language": "null", "identifiers": "null", "store_execute": "true", "status": "true", "lineage": "true", "raw": "false", "inputs": { "message": [{ "identifier": "message", "title": "A dummy message", "type": "literal", "data_type": "string", "data": "Dummy message", "allowed_values": [], }] }, "outputs": {} } # ExecuteResponse mock wps_response = type("", (object, ), { "_update_status": lambda *_, **__: 1 })() from weaver.exceptions import PackageExecutionError try: wps_package_instance._handler(wps_request, wps_response) except PackageExecutionError as exception: assert "Completed permanentFail" in exception.args[0] else: fail( "\"wps_package._handler()\" was expected to throw \"PackageExecutionError\" exception" )
def test_stdout_stderr_logging_for_commandline_tool_success(): """ Execute a process and assert that stdout is correctly logged to log file. """ process = Process({ "title": "test-stdout-stderr", "id": "test-stdout-stderr", "package": { "cwlVersion": "v1.0", "class": "CommandLineTool", "baseCommand": "echo", "inputs": { "message": { "type": "string", "inputBinding": { "position": 1 } } }, "outputs": {} } }) payload = process package = process["package"] title = process["title"] identifier = process["id"] # WPSPackage._handle() log_file = tempfile.NamedTemporaryFile() status_location = log_file.name workdir = tempfile.TemporaryDirectory() class TestWpsPackage(WpsPackage): @property def status_location(self): return status_location wps_package_instance = TestWpsPackage(identifier=identifier, title=title, payload=payload, package=package) wps_package_instance.set_workdir(workdir.name) # WPSRequest mock wps_request = WPSRequest() wps_request.json = { "identifier": "test-stdout-stderr", "operation": "execute", "version": "1.0.0", "language": "null", "identifiers": "null", "store_execute": "true", "status": "true", "lineage": "true", "raw": "false", "inputs": { "message": [{ "identifier": "message", "title": "A dummy message", "type": "literal", "data_type": "string", "data": "Dummy message", "allowed_values": [], }] }, "outputs": {} } # ExecuteResponse mock wps_response = type("", (object, ), { "_update_status": lambda *_, **__: 1 })() wps_package_instance._handler(wps_request, wps_response) # log assertions with open(status_location + ".log", "r") as file: log_data = file.read() assert "Dummy message" in log_data
def _handler(self, request: WPSRequest, response: ExecuteResponse): convert_to_csv = request.inputs["output_format"][0].data == "csv" if not convert_to_csv: del self.status_percentage_steps["convert_to_csv"] write_log(self, "Processing started", process_step="start") output_filename = make_output_filename(self, request.inputs) write_log(self, "Fetching datasets") variable = request.inputs["variable"][0].data variables = None if variable is None else [variable] rcp = single_input_or_none(request.inputs, "rcp") dataset_name = single_input_or_none(request.inputs, "dataset_name") request.inputs["resource"] = get_datasets(dataset_name, workdir=self.workdir, variables=variables, rcp=rcp) write_log(self, "Running subset", process_step="subset") output_files = finch_subset_bbox( self, netcdf_inputs=request.inputs["resource"], request_inputs=request.inputs, ) if not output_files: message = "No data was produced when subsetting using the provided bounds." raise ProcessError(message) if convert_to_csv: write_log(self, "Converting outputs to csv", process_step="convert_to_csv") csv_files, metadata_folder = netcdf_file_list_to_csv( output_files, output_folder=Path(self.workdir), filename_prefix=output_filename, ) output_files = csv_files + [metadata_folder] write_log(self, "Zipping outputs", process_step="zip_outputs") output_zip = Path(self.workdir) / (output_filename + ".zip") def _log(message, percentage): write_log(self, message, subtask_percentage=percentage) zip_files(output_zip, output_files, log_function=_log) response.outputs["output"].file = output_zip write_log(self, "Processing finished successfully", process_step="done") return response