def _handler(self, request, response): dataset = None if 'dataset_opendap' in request.inputs: dataset = request.inputs['dataset_opendap'][0].url LOGGER.debug("opendap dataset url: {}".format(dataset)) elif 'dataset' in request.inputs: dataset = request.inputs['dataset'][0].file LOGGER.debug("opendap dataset file: {}".format(dataset)) if not dataset: raise ProcessError("You need to provide a Dataset.") output_format = request.inputs['format'][0].data check_suite = CheckSuite() check_suite.load_all_available_checkers() if not request.inputs['test'][0].data in check_suite.checkers: raise ProcessError("Test {} is not available.".format(request.inputs['test'][0].data)) output_file = os.path.join( self.workdir, "check_report.{}".format(output_format)) LOGGER.info("checking dataset {}".format(dataset)) ComplianceChecker.run_checker( dataset, checker_names=[checker.data for checker in request.inputs['test']], verbose=True, criteria=request.inputs['criteria'][0].data, output_filename=output_file, output_format=output_format) response.outputs['output'].file = output_file response.update_status("compliance checker finshed.", 100) return response
def convertToGeotiff(self, filename: str, destname: str): args = [ 'gdal_translate', '-co', 'COMPRESS=DEFLATE', '-of', 'GTiff', filename, destname ] gdalprocess = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=self.workdir) gdaloutput = '' while gdalprocess.poll() is None: for line in iter(gdalprocess.stdout.readline, b''): gdaloutput += line.decode('ascii') if gdalprocess.returncode != 0: LOGGER.error('converting to GeoTIFF failed: ' + gdaloutput) raise ProcessError(self.internalErrorMsg) abspath = os.path.join(self.workdir, destname) if not os.path.exists(abspath): LOGGER.error('output from gdal_translate is missing') raise ProcessError(self.internalErrorMsg)
def load_rdata_to_python(r_file, r_object_name): """ Loads R objects from a .rda or .Rdata file into the embedded R environment, then exposes that object as a Python object. Parameters: r_file (str): path to an .rda or .rdata file r_object_name (str): name of an R object from the r_file Returns: Exposed R object as a python object """ try: robjects.r(f"load(file='{r_file}')") obj = robjects.r(r_object_name) return obj except RRuntimeError as e: err_name = re.compile(r"object \'(.*)\' not found").findall(str(e)) if "_" in err_name[0]: raise ProcessError( msg= f"{type(e).__name__}: The variable name passed is not an object found in the given rda file" ) else: raise ProcessError( msg= f"{type(e).__name__}: There is no object named {err_name[0]} in this rda file" )
def _guard_rail(input, box): measurement_dicts = input.output_measurements(box.product_definitions) byte_count = 1 for x in box.shape: byte_count *= x byte_count *= sum(np.dtype(m.dtype).itemsize for m in measurement_dicts.values()) print("byte count for query: ", byte_count) if byte_count > MAX_BYTES_IN_GB * GB: raise ProcessError( ("requested area requires {}GB data to load - " "maximum is {}GB").format( int(byte_count / GB), MAX_BYTES_IN_GB ) ) grouped = box.box print("grouped shape", grouped.shape) assert len(grouped.shape) == 1 if grouped.shape[0] == 0: raise ProcessError("no data returned for query") bytes_per_obs = byte_count / grouped.shape[0] if bytes_per_obs > MAX_BYTES_PER_OBS_IN_GB * GB: raise ProcessError( ( "requested time slices each requires {}GB data to load - " "maximum is {}GB" ).format(int(bytes_per_obs / GB), MAX_BYTES_PER_OBS_IN_GB) )
def validate_vectors(vectors): for vector in vectors: try: vect = robjects.r(vector) if not robjects.r["is.vector"](vect)[0]: raise ProcessError("Invalid type passed for vector") except RParsingError as e: raise ProcessError( msg=f"{type(e).__name__}: Invalid vector format, follow R vector syntax" )
def process(self, runner): # Either packages up original files (URLs) or # runs the process to generate the outputs # If original files should be returned, then add the files if self.use_original_files: result = ResultSet() for ds_id, file_urls in self.original_file_urls.items(): result.add(ds_id, file_urls) file_uris = result.file_uris # else: generate the new subset of files else: clean_inputs(self.inputs) # use search result if available if self.search_result: self.inputs["collection"] = [] for ds_id, file_uris in self.search_result.files().items(): self.inputs["collection"].append(FileMapper(file_uris)) try: file_uris = runner(self.inputs) except Exception as e: raise ProcessError(f"{e}") self.output_uris = file_uris
def prepare_csv_files( self, prec_file_content, tavg_file_content, tmax_file_content, tmin_file_content ): def write_csv(content): file_ = TempFile(mode="w+", suffix=".csv") file_.write(content) file_.seek(0) return file_ prec_file = write_csv(prec_file_content) if tavg_file_content: tavg_file = write_csv(tavg_file_content) return {"prec_file": prec_file, "tavg_file": tavg_file} elif tmax_file_content and tmin_file_content: tmax_file = write_csv(tmax_file_content) tmin_file = write_csv(tmin_file_content) return { "prec_file": prec_file, "tmin_file": tmin_file, "tmax_file": tmax_file, } else: raise ProcessError( "You must provide one of either a tavg file content or tmax and tmin file content" )
def _handler(self, request, response): # subsetting response.update_status('PyWPS Process started.', 0) # get variable and domain from json input variable = Variables.from_json( json.loads(request.inputs['variable'][0].data)).variables[0] domain = Domains.from_json(json.loads( request.inputs['domain'][0].data)).domains[0] output_file = None # TODO: Use chunks for parallel processing with dask.distributed try: output_file = subset(variable.uri, variable.var_name, domain.dimensions, self.workdir) response.outputs['nc'].file = output_file response.outputs['output'].data = Outputs( [Output(uri='http://test.nc')]).json response.update_status('subsetting done.', 70) except Exception: LOGGER.exception('subsetting failed') raise ProcessError("subsetting failed.") # plot preview try: response.outputs['preview'].file = simple_plot_preview( output_file, variable.var_name, self.workdir) response.update_status('plot done.', 80) except Exception: LOGGER.exception('plot failed') response.outputs['preview'].data = 'plot failed' response.update_status('plot failed.', 80) # done response.update_status('PyWPS Process completed.', 100) return response
def copy_and_get_filepath(self, request): """ This function takes an input "request" and returns a filepath to the input data. As the update_metadata simply updates the original file "in place", copying the input data is necessary for two reasons. 1. The original file is maintained without any corruption 2. Writing back to OPeNDAP file is nearly impossible """ path = request.inputs["netcdf"][0] if is_opendap_url(path.url): url = path.url input_dataset = xr.open_dataset(url) filename = url.split("/")[-1] original = os.path.join(self.workdir, filename) input_dataset.to_netcdf(original, format="NETCDF4_CLASSIC") elif path.file.endswith(".nc"): original = path.file else: raise ProcessError( "You must provide a data source (opendap/netcdf). Inputs provided" ) copy = original[:-3] + "_copy.nc" shutil.copyfile(original, copy) return copy
def _handler(self, request, response): response.update_status("Usage started.", 0) if "time" in request.inputs: time = request.inputs["time"][0].data time_start, time_end = time_parameter.TimeParameter( time).get_bounds() else: time = None time_start = time_end = None # usage try: usage = WPSUsage() response.outputs["wpsusage"].file = usage.collect( time_start=time_start, time_end=time_end, outdir=self.workdir) response.update_status("WPSUsage completed.", 50) except Exception as e: raise ProcessError(f"{e}") # downloads try: usage = Downloads() downloads_csv = usage.collect(time_start=time_start, time_end=time_end, outdir=self.workdir) response.outputs["downloads"].file = downloads_csv except Exception: LOGGER.exception("downloads collection failed") response.outputs["downloads"].data = EMPTY_CSV finally: response.update_status("Downloads usage completed.", 90) return response
def _handler(self, request, response): try: wfdata = request.inputs["workflow"][0].data LOGGER.debug(f"type wfdata={type(wfdata)}, wfdata={wfdata}") # print(f"type wfdata={type(wfdata)}, wfdata={wfdata}") # workaround for CDATA issue in pywps # wfdata = wfdata.replace("<![CDATA[", "").replace("]]>", "") wf = workflow.WorkflowRunner(output_dir=self.workdir) file_uris = wf.run(wfdata) except Exception as e: raise ProcessError(f"{e}") # Metalink document with collection of netcdf files # ml4 = MetaLink4( # "workflow-result", "Workflow result as NetCDF files.", workdir=self.workdir # ) ml4 = build_metalink( "workflow-result", "Workflow result as NetCDF files.", self.workdir, file_uris, ) # for ncfile in output: # mf = MetaFile("NetCDF file", "NetCDF file", fmt=FORMATS.NETCDF) # mf.file = ncfile # ml4.append(mf) response.outputs["output"].data = ml4.xml response.outputs["prov"].file = wf.provenance.write_json() response.outputs["prov_plot"].file = wf.provenance.write_png() return response
def _handler(request, response): data_refs = [dref.data for dref in request.inputs['data_ref']] if request.inputs['pre_checked'][0].data and not daops.is_characterised( data_refs, require_all=True): raise ProcessError('Data has not been pre-checked') response.outputs['output'].data = 'not working yet' return response
def edit_config_file(self, config_file, uhs_files, station_file, domain): with open(station_file, "r") as f: data = f.readlines() data[1] = uhs_files with open(station_file, "w") as f: f.writelines(data) parser = configparser.ConfigParser() parser.optionxform = str unprocessed = config_file config_dict = read_config(unprocessed) for section in config_dict.keys(): parser[section] = { k: str(config_dict[section][k]) for k in config_dict[section].keys() } try: parser["UHS_FILES"]["ROUT_DIR"] = "/".join( uhs_files.split("/")[:-1]) parser["UHS_FILES"]["STATION_FILE"] = station_file parser["DOMAIN"]["FILE_NAME"] = domain except KeyError as e: raise ProcessError( f"{type(e).__name__}: Invalid header or config key in config file" ) processed = ".".join(unprocessed.split(".")[:-1]) + "_edited.cfg" with open(processed, "w") as cfg: parser.write(cfg) return processed
def get_filepaths(nc_input): """Collect list of netcdf file paths Each path in nc_input is checked to determine if it is an OpenDAP url. If so, then the url is appended to the path list. If not, then whether or not it's a valid netcdf file is checked. If so, then the path is appended to the list. If not, then a ProcessError is raised due to an invalid input. Parameters: nc_input (pywps.ComplexInput): Object containing local or OpenDAP file paths Returns: list: List of filepaths """ filepaths = [] for path in nc_input: if is_opendap_url(path.url): filepaths.append(path.url) elif path.file.endswith(".nc"): filepaths.append(path.file) else: raise ProcessError( "You must provide a data source (opendap/netcdf). " f"Inputs provided: {nc_input}" ) return filepaths
def custom_process_error(err): """ProcessError from pywps only allows a limited list of valid chars in custom msgs or it reverts to it's default msg. By matching the end of a msg only and removing the '()' brackets and ' quote we can show some of the original error message to the user""" err_match = re.compile(r"[^:\n].*$").findall(str(err)) err_msg = err_match[0].replace("(", "").replace(")", "").replace("'", "") raise ProcessError(f"{type(err).__name__}: {err_msg}")
def wrap_director(collection, inputs, runner): # Ask director whether request should be rejected, use original files or apply WPS process try: director = Director(collection, inputs) director.process(runner) return director except Exception as e: raise ProcessError(f"{e}")
def _handler(request, response): response.update_status('PyWPS Process started.', 0) LOGGER.info("wps_error started ...") if request.inputs['nice'][0].data is True: raise ProcessError(request.inputs['message'][0].data) else: raise Exception("Sorry, we have no explanation for this error.")
def get_filepath(self, request): path = request.inputs["netcdf"][0] if is_opendap_url(path.url): return path.url elif path.file.endswith(".nc"): return path.file else: raise ProcessError( "You must provide a data source (opendap/netcdf).")
def _handler(self, request, response): loglevel, uhs_files, station_file, domain, config_file = collect_args_wrapper( request, self.workdir) log_handler( self, response, "Starting Process", logger, log_level=loglevel, process_step="start", ) log_handler( self, response, "Rebuilding configuration", logger, log_level=loglevel, process_step="config_rebuild", ) config_file = self.edit_config_file(config_file, uhs_files, station_file, domain) log_handler( self, response, "Run Parameter Conversion", logger, log_level=loglevel, process_step="process", ) try: convert(config_file) except Exception as e: raise ProcessError(f"{type(e).__name__}: {e}") log_handler( self, response, "Building final output", logger, log_level=loglevel, process_step="build_output", ) config = read_config(config_file) response.outputs["output"].file = get_outfile(config, "params") log_handler( self, response, "Process Complete", logger, log_level=loglevel, process_step="complete", ) return response
def createWavejets(self, response: WPSResponse): # gdal_contour -f geojson -p -amin wavemin -amax wavemax # -fl 0.3 0.5 ... eWave.2D.sshmax waveheights.geojson args = [ 'gdal_contour', '-f', 'geojson', '-p', '-amin', 'wavemin', '-amax', 'wavemax', '-fl' ] for i in self.intervalsWavejets: args.append(str(i)) args.append(self.ewOutputSshmax) args.append(self.geojsonSshmax) gdalprocess = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=self.workdir) gdaloutput = '' while gdalprocess.poll() is None: for line in iter(gdalprocess.stdout.readline, b''): gdaloutput += line.decode('ascii') if gdalprocess.returncode != 0: LOGGER.error('creating contours of max wave heights failed: ' + gdaloutput) raise ProcessError(self.internalErrorMsg) abspath = os.path.join(self.workdir, self.geojsonSshmax) if not os.path.exists(abspath): LOGGER.error('output from gdal_contour is missing (wave heights)') raise ProcessError(self.internalErrorMsg) response.outputs['waveheights'].data_format = FORMATS.GEOJSON response.outputs['waveheights'].file = abspath self.convertToGeotiff(self.ewOutputSshmax, self.geotiffSshmax) gtpath = os.path.join(self.workdir, self.geotiffSshmax) response.outputs['waveheightsRaw'].data_format = FORMATS.GEOTIFF response.outputs['waveheightsRaw'].file = gtpath
def _handler(self, request: WPSRequest, response: ExecuteResponse): self.write_log("Processing started", response, 5) variable = self.get_input_or_none(request.inputs, "variable") rcp = self.get_input_or_none(request.inputs, "rcp") lat0 = self.get_input_or_none(request.inputs, "lat0") lon0 = self.get_input_or_none(request.inputs, "lon0") output_format = request.inputs["output_format"][0].data output_filename = f"BCCAQv2_subset_{lat0}_{lon0}" self.write_log("Fetching BCCAQv2 datasets", response, 6) request.inputs = get_bccaqv2_inputs(request.inputs, variable, rcp) self.write_log("Running subset", response, 7) threads = int(configuration.get_config_value("finch", "subset_threads")) metalink = self.subset( request.inputs, response, start_percentage=7, end_percentage=90, threads=threads, ) if not metalink.files: message = "No data was produced when subsetting using the provided bounds." raise ProcessError(message) self.write_log("Subset done, creating zip file", response) output_files = [mf.file for mf in metalink.files] if output_format == "csv": csv_files, metadata_folder = netcdf_to_csv( output_files, output_folder=Path(self.workdir), filename_prefix=output_filename, ) output_files = csv_files + [metadata_folder] output_zip = Path(self.workdir) / (output_filename + ".zip") def log(message_, percentage_): self.write_log(message_, response, percentage_) zip_files(output_zip, output_files, log_function=log, start_percentage=90) response.outputs["output"].file = output_zip self.write_log("Processing finished successfully", response, 99) return response
def r_valid_name(robj_name): """The R function 'make.names' will change a name if it is not syntactically correct and leave it if it is Parameters: robj_name (str): The name of the robject to verify """ base = get_package("base") if base.make_names(robj_name)[0] != robj_name: raise ProcessError(msg="Your vector name is not a valid R name")
def _handler(self, request, response): loglevel = request.inputs["loglevel"][0].data log_handler( self, response, "Starting Process", logger, log_level=loglevel, process_step="start", ) filepaths = get_filepaths(request.inputs["netcdf"]) log_handler( self, response, f"Spliting climo files: {filepaths}", logger, log_level=loglevel, process_step="process", ) output_filepaths = [] for path in filepaths: try: input_file = CFDataset(path) except Exception: raise ProcessError( "The input for netcdf file paths could not be converted to a netcdf dataset" ) else: output_filepaths.extend(split_merged_climos(input_file, self.workdir)) log_handler( self, response, "Building final output", logger, log_level=loglevel, process_step="build_output", ) response.outputs["output"].data = build_meta_link( varname="split_climo", desc="Split climatologies", outfiles=output_filepaths, outdir=self.workdir, ) log_handler( self, response, "Process Complete", logger, log_level=loglevel, process_step="complete", ) return response
def _handler_wrapper(self, request, response): self.sentry_configure_scope(request) # The process has been deepcopied, so it's ok to assign it a single response. # We can now update the status document from the process instance itself. self.response = response try: return self.wrapped_handler(request, response) except Exception as err: LOGGER.exception('FinchProcess handler wrapper failed with:') raise ProcessError(f"Finch failed with {err!r}")
def _resolve(self): """ Resolve how the WPS will handle this request. Steps through the following: - Are all datasets in the inventory? If NO: raise Exception - Does the user want to access original files only? If YES: return (and use original files) - Does the user require data to be pre-checked AND has the collection been pre-checked? If NO: raise Exception - Does the user want to apply fixes AND fixes are required for this collection? If YES: return (and use WPS) - Does the requested temporal subset align with files in all datasets in this collection? If YES: return (and use original files) If NO: return (and use WPS) Raises: ProcessError: [description] ProcessError: [description] """ # search self.search_result = self.catalog.search( collection=self.coll, time=self.inputs.get("time"), time_components=self.inputs.get("time_components"), ) # Raise exception if any of the dataset ids is not in the inventory if len(self.search_result) != len(self.coll): raise InvalidCollection() # If original files are requested then go straight there if self.inputs.get("original_files"): self.original_file_urls = self.search_result.download_urls() self.use_original_files = True return # Raise exception if "pre_checked" selected but data has not been characterised by dachar if self.inputs.get("pre_checked") and not is_characterised( self.coll, require_all=True ): raise ProcessError("Data has not been pre-checked") # Check if fixes are required. If so, then return (and subset will be generated). if self.inputs.get("apply_fixes") and self.requires_fixes(): return # TODO: quick fix for average. Don't use original files for average operator if "dims" in self.inputs or "freq" in self.inputs: return # Finally, check if the subset requirements can align with whole datasets if self.request_aligns_with_files(): # This call sets values for self.original_file_urls AND self.use_original_files pass
def generate_dates(self, request, filename, obj_name, date_fields, date_format, cal): df = get_robj(filename, obj_name) robjects.r.assign(obj_name, df) try: return robjects.r( f"as.PCICt(do.call(paste, {obj_name}[,{date_fields}]), format='{date_format}', cal='{cal}')" ) except RRuntimeError as e: raise ProcessError( msg=f"{type(e).__name__}: Error generating dates")
def _parse_geom(request_json): features = request_json["features"] if len(features) < 1: # can't drill if there is no geometry raise ProcessError("no features specified") if len(features) > 1: # do we need multipolygon support here? raise ProcessError("multiple features specified") feature = features[0] if hasattr(request_json, "crs"): crs = CRS(request_json["crs"]["properties"]["name"]) elif hasattr(feature, "crs"): crs = CRS(feature["crs"]["properties"]["name"]) else: # http://geojson.org/geojson-spec.html#coordinate-reference-system-objects crs = CRS("urn:ogc:def:crs:OGC:1.3:CRS84") return Geometry(feature["geometry"], crs)
def _handler(self, request, response): response.update_status(f"PyWPS process {self.identifier} started.", 0) model = self.model(request) # Model configuration (zipped RV files in `conf` input) if "conf" in request.inputs: model.configure(self.get_config(request).values()) # Initial conditions (`rvc` input) if "rvc" in request.inputs: model.resume(request.inputs.pop("rvc")[0].file) if "random_numbers" in request.inputs: model.config.set_rv_file( request.inputs.pop("random_numbers")[0].file) # Input data files ts = self.meteo(request) # Model options kwds = self.options(request) # Launch model with input files try: self.run(model, ts, kwds) except Exception as exc: LOGGER.exception(exc) err_msg = traceback.format_exc() # By default the error message is limited to 300 chars and strips # many special characters raise ProcessError(err_msg, max_length=len(err_msg), allowed_chars=string.printable) from exc # Store output files name. If an output counts multiple files, they'll be zipped. for key in response.outputs.keys(): val = model.outputs.get(key) if val is not None: if isinstance(response.outputs[key], LiteralOutput): response.outputs[key].data = str(val) else: response.outputs[key].file = str(val) if val.suffix == ".zip": response.outputs[key].data_format = Format( "application/zip", extension=".zip", encoding="base64") else: response.outputs[key].data = "" return response
def unpack_data_file(self, data_file, data_vector): try: return load_rdata_to_python(data_file, data_vector) except (RRuntimeError, ProcessError, IndexError): pass try: return robjects.r(f"unlist(readRDS('{data_file}'))") except (RRuntimeError, ProcessError) as e: raise ProcessError( f"{type(e).__name__}: Data file must be a RDS file or " "a Rdata file containing an object of the given name" )
def process_complex(input): """Handler for ComplexInputs""" if "csv" in vars(input)["identifier"]: return input.stream elif "_url" in vars(input).keys() and vars(input)["_url"] != None: return url_handler(workdir, input.url) elif os.path.isfile(input.file): return input.file else: raise ProcessError("This input is not supported")