def test_to_absolute_path_with_absolute_input_and_nonrelative_parent(tmp_path): parent = tmp_path / "parent_dir" input_path = tmp_path / "nonexistent_file.txt" with pytest.raises(ValueError) as excinfo: to_absolute_path(str(input_path), parent=parent) assert "is not a subpath of parent" in str(excinfo.value)
def _setup_work_dir(self, cfg_dir: str = None): if cfg_dir: self.work_dir = to_absolute_path(cfg_dir) else: # Must exist before setting up default config timestamp = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d_%H%M%S") self.work_dir = to_absolute_path(f"pcrglobwb_{timestamp}", parent=CFG["output_dir"]) self.work_dir.mkdir(parents=True, exist_ok=True)
def __init__( self, start_time: str, end_time: str, directory: str, shape: Optional[str] = None, ): self.start_time = start_time self.end_time = end_time self.directory = to_absolute_path(directory) self.shape = to_absolute_path(shape) if shape is not None else shape
def _check_forcing(self, forcing): """Check forcing argument and get path, start and end time of forcing data.""" if isinstance(forcing, MarrmotForcing): forcing_dir = to_absolute_path(forcing.directory) self.forcing_file = str(forcing_dir / forcing.forcing_file) # convert date_strings to datetime objects self.forcing_start_time = get_time(forcing.start_time) self.forcing_end_time = get_time(forcing.end_time) else: raise TypeError( f"Unknown forcing type: {forcing}. Please supply a " " MarrmotForcing object.") # parse start/end time forcing_data = sio.loadmat(self.forcing_file, mat_dtype=True) if "parameters" in forcing_data: self._parameters = forcing_data["parameters"][0] if "store_ini" in forcing_data: self.store_ini = forcing_data["store_ini"][0] if "solver" in forcing_data: forcing_solver = forcing_data["solver"] self.solver.name = forcing_solver["name"][0][0][0] self.solver.resnorm_tolerance = forcing_solver[ "resnorm_tolerance"][0][0][0] self.solver.resnorm_maxiter = forcing_solver["resnorm_maxiter"][0][ 0][0]
def _setup_working_directory(self, cfg_dir: str = None): if cfg_dir: self.work_dir = to_absolute_path(cfg_dir) else: timestamp = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d_%H%M%S") self.work_dir = to_absolute_path(f"wflow_{timestamp}", parent=CFG["output_dir"]) # Make sure parents exist self.work_dir.parent.mkdir(parents=True, exist_ok=True) assert self.parameter_set shutil.copytree(src=self.parameter_set.directory, dst=self.work_dir) if self.forcing: forcing_path = to_absolute_path(self.forcing.netcdfinput, parent=self.forcing.directory) shutil.copy(src=forcing_path, dst=self.work_dir)
def _export_config(self) -> PathLike: self.config.set("globalOptions", "outputDir", str(self.work_dir)) new_cfg_file = to_absolute_path("pcrglobwb_ewatercycle.ini", parent=self.work_dir) with new_cfg_file.open("w") as filename: self.config.write(filename) self.cfg_file = new_cfg_file return self.cfg_file
def load_from_file(self, filename: Union[os.PathLike, str]) -> None: """Load user configuration from the given file.""" path = to_absolute_path(str(filename)) if not path.exists(): raise FileNotFoundError(f"Cannot find: `{filename}") self.clear() self.update(CFG_DEFAULT) self.update(Config._load_user_config(path))
def read_config_file(config_file: Union[os.PathLike, str]) -> dict: """Read config user file and store settings in a dictionary.""" config_file = to_absolute_path(str(config_file)) if not config_file.exists(): raise IOError(f"Config file `{config_file}` does not exist.") with open(config_file, "r") as file: yaml = YAML(typ="safe") cfg = yaml.load(file) return cfg
def __init__( self, name: str, directory: str, config: str, doi="N/A", target_model="generic", supported_model_versions: Optional[Set[str]] = None, ): self.name = name self.directory = to_absolute_path(directory, parent=CFG.get("parameterset_dir"), must_be_in_parent=False) self.config = to_absolute_path(config, parent=CFG.get("parameterset_dir"), must_be_in_parent=False) self.doi = doi self.target_model = target_model self.supported_model_versions = (set() if supported_model_versions is None else supported_model_versions)
def _check_forcing(self, forcing): """Check forcing argument and get path, start/end time of forcing data.""" # TODO check if mask has same grid as forcing files, # if not warn users to run reindex_forcings if isinstance(forcing, LisfloodForcing): self.forcing = forcing self.forcing_dir = to_absolute_path(forcing.directory) # convert date_strings to datetime objects self._start = get_time(forcing.start_time) self._end = get_time(forcing.end_time) else: raise TypeError(f"Unknown forcing type: {forcing}. " "Please supply a LisfloodForcing object.")
def _setup_default_config(self): config_file = self.parameter_set.config input_dir = self.parameter_set.directory cfg = CaseConfigParser() cfg.read(config_file) cfg.set("globalOptions", "inputDir", str(input_dir)) if self.forcing: cfg.set( "globalOptions", "startTime", get_time(self.forcing.start_time).strftime("%Y-%m-%d"), ) cfg.set( "globalOptions", "endTime", get_time(self.forcing.start_time).strftime("%Y-%m-%d"), ) cfg.set( "meteoOptions", "temperatureNC", str( to_absolute_path( self.forcing.temperatureNC, parent=self.forcing.directory, )), ) cfg.set( "meteoOptions", "precipitationNC", str( to_absolute_path( self.forcing.precipitationNC, parent=self.forcing.directory, )), ) self.config = cfg
def load(directory: str): """Load previously generated or imported forcing data. Args: directory: forcing data directory; must contain `ewatercycle_forcing.yaml` file Returns: Forcing object """ yaml = YAML() source = to_absolute_path(directory) # TODO give nicer error yaml.register_class(DefaultForcing) for forcing_cls in FORCING_CLASSES.values(): yaml.register_class(forcing_cls) # Set directory in yaml string to parent of yaml file # Because in DefaultForcing.save the directory was removed forcing_info = yaml.load(source / FORCING_YAML) forcing_info.directory = source if forcing_info.shape: forcing_info.shape = to_absolute_path(forcing_info.shape, parent=source) return forcing_info
def _generate_cfg_dir(cfg_dir: Path = None) -> Path: """Make sure there is a working directory. Args: cfg_dir: If cfg dir is None or does not exist then create sub-directory in CFG['output_dir'] """ if cfg_dir is None: scratch_dir = CFG["output_dir"] # TODO this timestamp isnot safe for parallel processing timestamp = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d_%H%M%S") cfg_dir = to_absolute_path(f"marrmot_{timestamp}", parent=Path(scratch_dir)) cfg_dir.mkdir(parents=True, exist_ok=True) return cfg_dir
def _generate_workdir(cfg_dir: Path = None) -> Path: """Create or make sure workdir exists. Args: cfg_dir: If cfg dir is None then create sub-directory in CFG['output_dir'] Returns: absolute path of workdir """ if cfg_dir is None: scratch_dir = CFG["output_dir"] # TODO this timestamp isnot safe for parallel processing timestamp = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%d_%H%M%S") cfg_dir = to_absolute_path(f"lisflood_{timestamp}", parent=Path(scratch_dir)) cfg_dir.mkdir(parents=True, exist_ok=True) return cfg_dir
def setup(self, cfg_dir: str = None, **kwargs) -> Tuple[str, str]: # type: ignore """Start the model inside a container and return a valid config file. Args: cfg_dir: a run directory given by user or created for user. **kwargs (optional, dict): see :py:attr:`~parameters` for all configurable model parameters. Returns: Path to config file and working directory """ self._setup_working_directory(cfg_dir) cfg = self.config if "start_time" in kwargs: cfg.set("run", "starttime", _iso_to_wflow(kwargs["start_time"])) if "end_time" in kwargs: cfg.set("run", "endtime", _iso_to_wflow(kwargs["end_time"])) updated_cfg_file = to_absolute_path("wflow_ewatercycle.ini", parent=self.work_dir) with updated_cfg_file.open("w") as filename: cfg.write(filename) try: self._start_container() except FutureTimeoutError as exc: # https://github.com/eWaterCycle/grpc4bmi/issues/95 # https://github.com/eWaterCycle/grpc4bmi/issues/100 raise ValueError( "Couldn't spawn container within allocated time limit " "(300 seconds). You may try pulling the docker image with" f" `docker pull {self.docker_image}` or call `singularity " f"build {self._singularity_image(CFG['singularity_dir'])} " f"docker://{self.docker_image}` if you're using singularity," " and then try again.") from exc return ( str(updated_cfg_file), str(self.work_dir), )
def _check_forcing(self, forcing): """Check forcing argument and get path, start and end time of forcing data.""" if isinstance(forcing, MarrmotForcing): forcing_dir = to_absolute_path(forcing.directory) self.forcing_file = str(forcing_dir / forcing.forcing_file) # convert date_strings to datetime objects self.forcing_start_time = get_time(forcing.start_time) self.forcing_end_time = get_time(forcing.end_time) else: raise TypeError(f"Unknown forcing type: {forcing}. " "Please supply a MarrmotForcing object.") # parse start/end time forcing_data = sio.loadmat(self.forcing_file, mat_dtype=True) if "parameters" in forcing_data: if len(forcing_data["parameters"]) == len(self._parameters): self._parameters = forcing_data["parameters"] else: message = ("The length of parameters in forcing " f"{self.forcing_file} does not match " "the length of M14 parameters that is seven.") logger.warning("%s", message) if "store_ini" in forcing_data: if len(forcing_data["store_ini"]) == len(self.store_ini): self.store_ini = forcing_data["store_ini"] else: message = ("The length of initial stores in forcing " f"{self.forcing_file} does not match " "the length of M14 iniatial stores that is two.") logger.warning("%s", message) if "solver" in forcing_data: forcing_solver = forcing_data["solver"] self.solver.name = forcing_solver["name"][0][0][0] self.solver.resnorm_tolerance = forcing_solver[ "resnorm_tolerance"][0][0][0] self.solver.resnorm_maxiter = forcing_solver["resnorm_maxiter"][0][ 0][0]
def test_to_absolute_path_with_relative_input_and_relative_parent(): input_path = "nonexistent_file.txt" parsed = to_absolute_path(input_path, parent=Path(".")) expected = Path.cwd() / "nonexistent_file.txt" assert parsed == expected
def test_to_absolute_path_with_relative_input_and_parent(tmp_path): input_path = "nonexistent_file.txt" parsed = to_absolute_path(input_path, parent=tmp_path) expected = tmp_path / "nonexistent_file.txt" assert parsed == expected
def test_to_absolute_path_with_absolute_input_and_parent(tmp_path): input_path = tmp_path / "nonexistent_file.txt" parsed = to_absolute_path(str(input_path), parent=tmp_path) assert parsed == input_path
def test_to_absolute_path_must_exist(): input_path = "~/nonexistent_file.txt" with pytest.raises(FileNotFoundError): to_absolute_path(input_path, must_exist=True)
def test_to_absolute_path(): input_path = "~/nonexistent_file.txt" parsed = to_absolute_path(input_path) expected = Path.home() / "nonexistent_file.txt" assert parsed == expected
def setup( # type: ignore self, maximum_soil_moisture_storage: float = None, threshold_flow_generation_evap_change: float = None, leakage_saturated_zone_flow_coefficient: float = None, zero_deficit_base_flow_speed: float = None, baseflow_coefficient: float = None, gamma_distribution_chi_parameter: float = None, gamma_distribution_phi_parameter: float = None, initial_upper_zone_storage: float = None, initial_saturated_zone_storage: float = None, start_time: str = None, end_time: str = None, solver: Solver = None, cfg_dir: str = None, ) -> Tuple[str, str]: """Configure model run. 1. Creates config file and config directory based on the forcing variables and time range 2. Start bmi container and store as :py:attr:`bmi` Args: maximum_soil_moisture_storage: in mm. Range is specfied in `model parameter range file <https://github.com/wknoben/MARRMoT/blob/master/MARRMoT/Models/Parameter%20range%20files/m_01_collie1_1p_1s_parameter_ranges.m>`_. threshold_flow_generation_evap_change. leakage_saturated_zone_flow_coefficient: in mm/d. zero_deficit_base_flow_speed: in mm/d. baseflow_coefficient: in mm-1. gamma_distribution_chi_parameter. gamma_distribution_phi_parameter. initial_upper_zone_storage: in mm. initial_saturated_zone_storage: in mm. start_time: Start time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing start time is used. end_time: End time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing end time is used. solver: Solver settings cfg_dir: a run directory given by user or created for user. Returns: Path to config file and path to config directory """ arguments = vars() arguments_subset = {key: arguments[key] for key in M14_PARAMS} for index, key in enumerate(M14_PARAMS): if arguments_subset[key] is not None: self._parameters[index] = arguments_subset[key] if initial_upper_zone_storage: self.store_ini[0] = initial_upper_zone_storage if initial_saturated_zone_storage: self.store_ini[1] = initial_saturated_zone_storage if solver: self.solver = solver cfg_dir_as_path = None if cfg_dir: cfg_dir_as_path = to_absolute_path(cfg_dir) cfg_dir_as_path = _generate_cfg_dir(cfg_dir_as_path) config_file = self._create_marrmot_config(cfg_dir_as_path, start_time, end_time) if CFG["container_engine"].lower() == "singularity": message = f"The singularity image {self.singularity_image} does not exist." assert self.singularity_image.exists(), message self.bmi = BmiClientSingularity( image=str(self.singularity_image), work_dir=str(cfg_dir_as_path), timeout=300, ) elif CFG["container_engine"].lower() == "docker": self.bmi = BmiClientDocker( image=self.docker_image, image_port=55555, work_dir=str(cfg_dir_as_path), timeout=300, ) else: raise ValueError( f"Unknown container technology in CFG: {CFG['container_engine']}" ) return str(config_file), str(cfg_dir_as_path)
def get_grdc_data( station_id: str, start_time: str, end_time: str, parameter: str = "Q", data_home: str = None, column: str = "streamflow", ) -> Tuple[pd.core.frame.DataFrame, MetaDataType]: """Get river discharge data from Global Runoff Data Centre (GRDC). Requires the GRDC daily data files in a local directory. The GRDC daily data files can be ordered at https://www.bafg.de/GRDC/EN/02_srvcs/21_tmsrs/riverdischarge_node.html Args: station_id: The station id to get. The station id can be found in the catalogues at https://www.bafg.de/GRDC/EN/02_srvcs/21_tmsrs/212_prjctlgs/project_catalogue_node.html start_time: Start time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. end_time: End time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. parameter: optional. The parameter code to get, e.g. ('Q') discharge, cubic meters per second. data_home : optional. The directory where the daily grdc data is located. If left out will use the grdc_location in the eWaterCycle configuration file. column: optional. Name of column in dataframe. Default: "streamflow". Returns: grdc data in a dataframe and metadata. Examples: .. code-block:: python from ewatercycle.observation.grdc import get_grdc_data df, meta = get_grdc_data('6335020', '2000-01-01T00:00Z', '2001-01-01T00:00Z') df.describe() streamflow count 4382.000000 mean 2328.992469 std 1190.181058 min 881.000000 25% 1550.000000 50% 2000.000000 75% 2730.000000 max 11300.000000 meta {'grdc_file_name': '/home/myusername/git/eWaterCycle/ewatercycle/6335020_Q_Day.Cmd.txt', 'id_from_grdc': 6335020, 'file_generation_date': '2019-03-27', 'river_name': 'RHINE RIVER', 'station_name': 'REES', 'country_code': 'DE', 'grdc_latitude_in_arc_degree': 51.756918, 'grdc_longitude_in_arc_degree': 6.395395, 'grdc_catchment_area_in_km2': 159300.0, 'altitude_masl': 8.0, 'dataSetContent': 'MEAN DAILY DISCHARGE (Q)', 'units': 'm³/s', 'time_series': '1814-11 - 2016-12', 'no_of_years': 203, 'last_update': '2018-05-24', 'nrMeasurements': 'NA', 'UserStartTime': '2000-01-01T00:00Z', 'UserEndTime': '2001-01-01T00:00Z', 'nrMissingData': 0} """ # noqa: E501 if data_home: data_path = to_absolute_path(data_home) elif CFG["grdc_location"]: data_path = to_absolute_path(CFG["grdc_location"]) else: raise ValueError( "Provide the grdc path using `data_home` argument" "or using `grdc_location` in ewatercycle configuration file.") if not data_path.exists(): raise ValueError(f"The grdc directory {data_path} does not exist!") # Read the raw data raw_file = data_path / f"{station_id}_{parameter}_Day.Cmd.txt" if not raw_file.exists(): raise ValueError(f"The grdc file {raw_file} does not exist!") # Convert the raw data to an xarray metadata, df = _grdc_read( raw_file, start=get_time(start_time).date(), end=get_time(end_time).date(), column=column, ) # Add start/end_time to metadata metadata["UserStartTime"] = start_time metadata["UserEndTime"] = end_time # Add number of missing data to metadata metadata["nrMissingData"] = _count_missing_data(df, column) # Shpw info about data _log_metadata(metadata) return df, metadata
def _create_lisflood_config( self, cfg_dir: Path, start_time_iso: str = None, end_time_iso: str = None, IrrigationEfficiency: str = None, # noqa: N803 MaskMap: str = None, ) -> Path: """Create lisflood config file.""" assert self.parameter_set is not None assert self.forcing is not None # overwrite dates if given if start_time_iso is not None: start_time = get_time(start_time_iso) if self._start <= start_time <= self._end: self._start = start_time else: raise ValueError("start_time outside forcing time range") if end_time_iso is not None: end_time = get_time(end_time_iso) if self._start <= end_time <= self._end: self._end = end_time else: raise ValueError("end_time outside forcing time range") settings = { "CalendarDayStart": self._start.strftime("%d/%m/%Y 00:00"), "StepStart": "1", "StepEnd": str((self._end - self._start).days), "PathRoot": str(self.parameter_set.directory), "PathMeteo": str(self.forcing_dir), "PathOut": str(cfg_dir), } if IrrigationEfficiency is not None: settings["IrrigationEfficiency"] = IrrigationEfficiency if MaskMap is not None: mask_map = to_absolute_path(MaskMap) settings["MaskMap"] = str(mask_map.with_suffix("")) for textvar in self.cfg.config.iter("textvar"): textvar_name = textvar.attrib["name"] # general settings for key, value in settings.items(): if key in textvar_name: textvar.set("value", value) # input for lisflood if "PrefixPrecipitation" in textvar_name: textvar.set("value", Path(self.forcing.PrefixPrecipitation).stem) if "PrefixTavg" in textvar_name: textvar.set("value", Path(self.forcing.PrefixTavg).stem) # maps_prefixes dictionary contains lisvap filenames in lisflood config maps_prefixes = { "E0Maps": { "name": "PrefixE0", "value": Path(self.forcing.PrefixE0).stem, }, "ES0Maps": { "name": "PrefixES0", "value": Path(self.forcing.PrefixES0).stem, }, "ET0Maps": { "name": "PrefixET0", "value": Path(self.forcing.PrefixET0).stem, }, } # output of lisvap for map_var, prefix in maps_prefixes.items(): if prefix["name"] in textvar_name: textvar.set("value", prefix["value"]) if map_var in textvar_name: textvar.set("value", f"$(PathMeteo)/$({prefix['name']})") # Write to new setting file lisflood_file = cfg_dir / "lisflood_setting.xml" self.cfg.save(str(lisflood_file)) return lisflood_file
def setup( # type: ignore self, maximum_soil_moisture_storage: float = None, initial_soil_moisture_storage: float = None, start_time: str = None, end_time: str = None, solver: Solver = None, cfg_dir: str = None, ) -> Tuple[str, str]: """Configure model run. 1. Creates config file and config directory based on the forcing variables and time range 2. Start bmi container and store as :py:attr:`bmi` Args: maximum_soil_moisture_storage: in mm. Range is specfied in `model parameter range file <https://github.com/wknoben/MARRMoT/blob/master/MARRMoT/Models/Parameter%20range%20files/m_01_collie1_1p_1s_parameter_ranges.m>`_. initial_soil_moisture_storage: in mm. start_time: Start time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing start time is used. end_time: End time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing end time is used. solver: Solver settings cfg_dir: a run directory given by user or created for user. Returns: Path to config file and path to config directory """ if maximum_soil_moisture_storage: self._parameters = [maximum_soil_moisture_storage] if initial_soil_moisture_storage: self.store_ini = [initial_soil_moisture_storage] if solver: self.solver = solver cfg_dir_as_path = None if cfg_dir: cfg_dir_as_path = to_absolute_path(cfg_dir) cfg_dir_as_path = _generate_cfg_dir(cfg_dir_as_path) config_file = self._create_marrmot_config(cfg_dir_as_path, start_time, end_time) if CFG["container_engine"].lower() == "singularity": message = f"The singularity image {self.singularity_image} does not exist." assert self.singularity_image.exists(), message self.bmi = BmiClientSingularity( image=str(self.singularity_image), work_dir=str(cfg_dir_as_path), timeout=300, ) elif CFG["container_engine"].lower() == "docker": self.bmi = BmiClientDocker( image=self.docker_image, image_port=55555, work_dir=str(cfg_dir_as_path), timeout=300, ) else: raise ValueError( f"Unknown container technology in CFG: {CFG['container_engine']}" ) return str(config_file), str(cfg_dir_as_path)
def setup( # type: ignore self, IrrigationEfficiency: str = None, # noqa: N803 start_time: str = None, end_time: str = None, MaskMap: str = None, cfg_dir: str = None, ) -> Tuple[str, str]: """Configure model run. 1. Creates config file and config directory based on the forcing variables and time range. 2. Start bmi container and store as :py:attr:`bmi` Args: IrrigationEfficiency: Field application irrigation efficiency. max 1, ~0.90 drip irrigation, ~0.75 sprinkling start_time: Start time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing start time is used. end_time: End time of model in UTC and ISO format string e.g. 'YYYY-MM-DDTHH:MM:SSZ'. If not given then forcing end time is used. MaskMap: Mask map to use instead of one supplied in parameter set. Path to a NetCDF or pcraster file with same dimensions as parameter set map files and a boolean variable. cfg_dir: a run directory given by user or created for user. Returns: Path to config file and path to config directory """ # TODO forcing can be a part of parameter_set cfg_dir_as_path = None if cfg_dir: cfg_dir_as_path = to_absolute_path(cfg_dir) cfg_dir_as_path = _generate_workdir(cfg_dir_as_path) config_file = self._create_lisflood_config( cfg_dir_as_path, start_time, end_time, IrrigationEfficiency, MaskMap, ) assert self.parameter_set is not None input_dirs = [str(self.parameter_set.directory), str(self.forcing_dir)] if MaskMap is not None: mask_map = to_absolute_path(MaskMap) try: mask_map.relative_to(self.parameter_set.directory) except ValueError: # If not relative add dir input_dirs.append(str(mask_map.parent)) if CFG["container_engine"].lower() == "singularity": image = get_singularity_image(self.version, CFG["singularity_dir"]) self.bmi = BmiClientSingularity( image=str(image), input_dirs=input_dirs, work_dir=str(cfg_dir_as_path), timeout=300, ) elif CFG["container_engine"].lower() == "docker": image = get_docker_image(self.version) self.bmi = BmiClientDocker( image=image, image_port=55555, input_dirs=input_dirs, work_dir=str(cfg_dir_as_path), timeout=300, ) else: raise ValueError( f"Unknown container technology in CFG: {CFG['container_engine']}" ) return str(config_file), str(cfg_dir_as_path)