def test_path(data_path, error): # tests path checker if error is None: read_data(data_path) else: with pytest.raises(error): read_data(data_path)
def test_shape(data_path, design_value_name, keys, expected): # tests that the function loads a variety of test data # properly print("EXPECTED", expected) if isinstance(expected, tuple): ds = read_data(data_path, keys) assert ds[design_value_name].shape == expected else: with pytest.raises(expected): read_data(data_path)
import pytest import pandas as pd import numpy as np from nptyping import NDArray from typing import Any from climpyrical.rkrig import check_df, krigit_north, rkrig_py, rkrig_r from climpyrical.data import read_data from pkg_resources import resource_filename df = pd.DataFrame({"x": np.ones(5), "y": np.ones(5), "z": np.ones(5)}) ds = read_data(resource_filename("climpyrical", "tests/data/canada_mask_rp.nc")) df_ = pd.read_csv(resource_filename("climpyrical", "tests/data/sl50_short.csv")) @pytest.mark.parametrize( "df, keys, error", [ (df, ["x", "y", "z"], None), (df, ["x", "y", "z", "x1"], KeyError), ], ) def test_check_df(df, keys, error): if error is None: check_df(df, keys) else: with pytest.raises(error):
def downscale_and_fill(in_path, out_path, fill_glaciers, log_level): """Takes a CanRCM4 model at the native resolution and downscales from 50 km to 5 km and fills in missing land values using external masks. Args: in_path, out_path (strings): directories of NetCDF4 file input and output. Must give filename, too with extension .nc. Overwites files with same name in same directory. fill_glaciers (bool): whether to fill spurious glacier points with preprocessed mask. Default is True. log_level (str): Default INFO Returns: Creates a NetCDF4 file at out_path at target resolution """ logging.basicConfig(level=log_level) ds = read_data(in_path) (dv, ) = ds.data_vars unit = ds[dv].attrs["units"] rlon, rlat = np.meshgrid(ds.rlon, ds.rlat) mean = ds[dv].values accepted_units = ["kPa", "Pa", "degC", "mm", "unitless", "%"] logging.info(f"Detect units: {unit}") if unit not in accepted_units: warnings.warn( f"{unit} not recognized from list of accepted units: {accepted_units}" ) if unit == "degC": kelvin = 273.15 # K logging.info("Temperature field detected. Converting to Kelvin.") mean += kelvin ds[dv].attrs["units"] = "K" # if other units need converting in the future, use pint path_mask = resource_filename("climpyrical", "data/mask/land_mask_CanRCM4_sftlf.nc") path_glacier_mask = resource_filename("climpyrical", "data/mask/glacier_mask.nc") logging.info("Load and regrid file to target resolution") mask = read_data(path_mask) mask = regrid_ensemble(mask, "sftlf", 10, copy=True) mask = mask["sftlf"] >= 1.0 logging.info("Load original reoslution mask for reference") mask_og = read_data(path_mask)["sftlf"].values != 0.0 glaciermask = read_data(path_glacier_mask)["mask"].values != 0.0 logging.info("Insert NaN values into glacier points to fill" "and interpolate if fill_galciers is set") if fill_glaciers: logging.info("Filling spurious glacier points.") mean[glaciermask] = np.nan nanmask = ~np.isnan(mean) points = np.stack([rlon[nanmask], rlat[nanmask]]).T target_values = mean[nanmask] target_points = np.stack([rlon[glaciermask], rlat[glaciermask]]).T mean[glaciermask] = interpolate_dataset(points, target_values, target_points, "linear") ds = gen_dataset(dv, mean, ds.rlat, ds.rlon, ds.lat, ds.lon, unit) logging.info("Remove water cells at original resolution") ds[dv].values[~mask_og] = np.nan nanmask = ~np.isnan(ds[dv].values) logging.info("Copying and downscaling dataset 10x") ds10 = regrid_ensemble(ds, dv, 10, copy=True) ds10[dv].values[~mask] = np.nan nrlon, nrlat = np.meshgrid(ds10.rlon, ds10.rlat) nanmask10 = ~np.isnan(ds10[dv].values) logging.info("Interpolating full remaining grid") points = np.stack([rlon[nanmask], rlat[nanmask]]).T target_points = np.stack([nrlon[nanmask10], nrlat[nanmask10]]).T values = ds[dv].values[nanmask] ds10[dv].values[nanmask10] = interpolate_dataset(points, values, target_points, "linear") logging.info("Add northern domain to model") ds10 = extend_north(ds10, dv, 210, fill_val=np.nan) nanmask10 = ~np.isnan(ds10[dv].values) canada_mask_path = resource_filename("climpyrical", "/tests/data/canada_mask_rp.nc") with read_data(canada_mask_path) as ds_canada: ca_mask = extend_north(ds_canada, "mask", 210, fill_val=np.nan) ca_mask = ds_canada["mask"].values # select NaN values within new mask ca_mask_or = ~np.logical_or(~ca_mask, nanmask10) logging.info("Fill remaining missing points using closest neighbour.") nrlon, nrlat = np.meshgrid(ds10.rlon.values, ds10.rlat.values) temp_field = ds10[dv].values points = np.stack([nrlon[nanmask10], nrlat[nanmask10]]).T target_points = np.stack([nrlon[ca_mask_or], nrlat[ca_mask_or]]).T target_values = ds10[dv].values[nanmask10] temp_field[~ca_mask] = np.nan temp_field[ca_mask_or] = interpolate_dataset(points, target_values, target_points, "nearest") logging.info("Remove the processed northern region.") uaa_mask_path = resource_filename("climpyrical", "tests/data/canada_mask_north_rp.nc") uaa_mask = read_data(uaa_mask_path)["mask"] temp_field[uaa_mask] = np.nan ds_processed = gen_dataset(dv, temp_field, ds10.rlat, ds10.rlon, ds10.lat, ds10.lon, unit) logging.info("Dataset generated and writing to file.") ds_processed.to_netcdf(out_path, "w") logging.info("Completed!")
(np.ones((10, 10)), "int", TypeError), (np.ones((10, 10, 10)), 4, ValueError), ], ) def test_check_ndims(data, n, error): if error is None: check_ndims(data, n) else: with pytest.raises(error): check_ndims(data, n) # load example ensemble dataset for testing # dv = "Rain-RL50" dv = "snw" ds = read_data(resource_filename("climpyrical", "tests/data/example2.nc")) ds_regridded_proper = read_data( resource_filename("climpyrical", "tests/data/snw_target_res.nc")) @pytest.mark.parametrize( "ds,dv,n,keys,copy", [ (ds, dv, 3, ["rlon", "rlat", "lon", "lat"], True), (ds, dv, 3, ["rlon", "rlat", "lon", "lat"], False), ], ) def test_regrid_ensemble(ds, dv, n, keys, copy): ndim = np.ndim(ds[dv].values) nds = regrid_ensemble(ds, dv, n, keys, copy)
import sys from climpyrical.data import read_data from climpyrical.gridding import rot2reg import warnings warnings.filterwarnings("ignore") """ quick usage of climpyrical.rot2reg usage: python rot2reg input.nc output.nc """ IN_PATH = sys.argv[1] OUT_PATH = sys.argv[2] ds = read_data(IN_PATH) lonlat_proj = { "proj": "longlat", "ellps": "WGS84", "datum": "WGS84", "no_defs": True, } rotated_proj = { "proj": "ob_tran", "o_proj": "longlat", "lon_0": -97, "o_lat_p": 42.5, "a": 6378137, "to_meter": 0.0174532925199,
def add_model_values( model_path=None, ds=None, stations_path=None, df=None, model_dv="model_values", log_level="INFO", ): """Locates the model value that's spatially closest to a station Args: model_path, stations_path (strings): directories of NetCDF4 file input and station file. Must give filename with extension .nc and .csv respectively out_path (str): directory of output csv file name. must include extension log_level (str): Default INFO Returns: Creates a .csv file with corresponding model values. """ logging.basicConfig(level=log_level) if model_path is None and ds is None: raise ValueError("Please provide at least" "model path or xarray.Dataset object") if model_path is not None and ds is not None: raise ValueError("Provided both model path" "and xarray.Dataset. " "Please only provide one or the other.") if ds is None and model_path is not None: ds = read_data(model_path) (dv, ) = ds.data_vars unit = ds[dv].attrs["units"] rlon, rlat = np.meshgrid(ds.rlon, ds.rlat) # mean = ds[dv].values accepted_units = ["kPa", "Pa", "degC", "mm", "unitless", "%"] logging.info(f"Detect units: {unit}") if unit not in accepted_units: warnings.warn( f"{unit} not recognized from list of accepted units: {accepted_units}" ) # if unit == "degC": # kelvin = 273.15 # K # logging.info("Temperature field detected. Converting to Kelvin.") # mean += kelvin # ds[dv].attrs["units"] = "K" if stations_path is not None: if stations_path.endswith(".csv"): df = pd.read_csv(stations_path) else: df = pd.read_excel(stations_path) if stations_path is None and df is None: raise ValueError( "Must provide either stations_path or pandas.Dataframe") if "longitude" in df.columns: df = df.rename(columns={"longitude": "lon"}) if "Lon" in df.columns: df = df.rename(columns={"Lon": "lon"}) if "Lat" in df.columns: df = df.rename(columns={"Lat": "lat"}) if "long" in df.columns: df = df.rename(columns={"long": "lon"}) if "latitude" in df.columns: df = df.rename(columns={"latitude": "lat"}) if "name" in df.columns: df = df.rename(columns={"name": "station_name"}) if "Name" in df.columns: df = df.rename(columns={"Name": "station_name"}) if "prov" in df.columns: df = df.rename(columns={"prov": "province"}) if "elev" in df.columns: df = df.rename(columns={"elev": "elev (m)"}) if "elevation (m)" in df.columns: df = df.rename(columns={"elevation (m)": "elev (m)"}) keys = ["lat", "lon"] contains_keys = [key not in df.columns for key in keys] if np.any(contains_keys): raise KeyError(f"Dataframe must contain {keys}") rkeys = ["rlat", "rlon"] contains_rkeys = [key not in df.columns for key in rkeys] if np.any(contains_rkeys): logging.info("rlat or rlon not detected in input file." "converting assumes WGS84 coords to rotated pole") nx, ny = transform_coords(df.lon.values, df.lat.values) df = df.assign(rlat=ny, rlon=nx) logging.info("Matching coordinates now") ix, iy = find_element_wise_nearest_pos(ds.rlon.values, ds.rlat.values, df.rlon.values, df.rlat.values) logging.info("Locating corresponding model values" "Interpolating to nearest if matched model value is NaN") model_vals = find_nearest_index_value(ds.rlon.values, ds.rlat.values, ix, iy, ds[dv].values) if np.any(np.isnan(model_vals)): raise ValueError("NaN detected as matching output. Critical error.") df_new = df.assign(irlat=iy, irlon=ix) df_new[model_dv] = model_vals return df_new