Python read_data Examples, climpyrical.data.read_data Python Examples

Example #1

0

Show file

def test_path(data_path, error):
    # tests path checker

    if error is None:
        read_data(data_path)
    else:
        with pytest.raises(error):
            read_data(data_path)

Example #2

0

Show file

def test_shape(data_path, design_value_name, keys, expected):
    # tests that the function loads a variety of test data
    # properly
    print("EXPECTED", expected)
    if isinstance(expected, tuple):
        ds = read_data(data_path, keys)
        assert ds[design_value_name].shape == expected
    else:
        with pytest.raises(expected):
            read_data(data_path)

Example #3

0

Show file

File: test_rkrig.py Project: pacificclimate/climpyrical

import pytest
import pandas as pd
import numpy as np
from nptyping import NDArray
from typing import Any

from climpyrical.rkrig import check_df, krigit_north, rkrig_py, rkrig_r
from climpyrical.data import read_data
from pkg_resources import resource_filename

df = pd.DataFrame({"x": np.ones(5), "y": np.ones(5), "z": np.ones(5)})

ds = read_data(resource_filename("climpyrical",
                                 "tests/data/canada_mask_rp.nc"))

df_ = pd.read_csv(resource_filename("climpyrical",
                                    "tests/data/sl50_short.csv"))


@pytest.mark.parametrize(
    "df, keys, error",
    [
        (df, ["x", "y", "z"], None),
        (df, ["x", "y", "z", "x1"], KeyError),
    ],
)
def test_check_df(df, keys, error):
    if error is None:
        check_df(df, keys)
    else:
        with pytest.raises(error):

Example #4

0

Show file

def downscale_and_fill(in_path, out_path, fill_glaciers, log_level):
    """Takes a CanRCM4 model at the native resolution and
    downscales from 50 km to  5 km and fills in missing
    land values using external masks.

    Args:
        in_path, out_path (strings): directories of NetCDF4 file
            input and output. Must give filename, too with extension
            .nc. Overwites files with same name in same directory.
        fill_glaciers (bool): whether to fill spurious glacier
            points with preprocessed mask. Default is True.
        log_level (str): Default INFO
    Returns:
        Creates a NetCDF4 file at out_path at target resolution
    """
    logging.basicConfig(level=log_level)

    ds = read_data(in_path)
    (dv, ) = ds.data_vars
    unit = ds[dv].attrs["units"]

    rlon, rlat = np.meshgrid(ds.rlon, ds.rlat)
    mean = ds[dv].values

    accepted_units = ["kPa", "Pa", "degC", "mm", "unitless", "%"]

    logging.info(f"Detect units: {unit}")
    if unit not in accepted_units:
        warnings.warn(
            f"{unit} not recognized from list of accepted units: {accepted_units}"
        )

    if unit == "degC":
        kelvin = 273.15  # K
        logging.info("Temperature field detected. Converting to Kelvin.")
        mean += kelvin
        ds[dv].attrs["units"] = "K"

    # if other units need converting in the future, use pint

    path_mask = resource_filename("climpyrical",
                                  "data/mask/land_mask_CanRCM4_sftlf.nc")

    path_glacier_mask = resource_filename("climpyrical",
                                          "data/mask/glacier_mask.nc")

    logging.info("Load and regrid file to target resolution")
    mask = read_data(path_mask)
    mask = regrid_ensemble(mask, "sftlf", 10, copy=True)
    mask = mask["sftlf"] >= 1.0

    logging.info("Load original reoslution mask for reference")
    mask_og = read_data(path_mask)["sftlf"].values != 0.0

    glaciermask = read_data(path_glacier_mask)["mask"].values != 0.0

    logging.info("Insert NaN values into glacier points to fill"
                 "and interpolate if fill_galciers is set")
    if fill_glaciers:
        logging.info("Filling spurious glacier points.")
        mean[glaciermask] = np.nan

    nanmask = ~np.isnan(mean)
    points = np.stack([rlon[nanmask], rlat[nanmask]]).T
    target_values = mean[nanmask]
    target_points = np.stack([rlon[glaciermask], rlat[glaciermask]]).T

    mean[glaciermask] = interpolate_dataset(points, target_values,
                                            target_points, "linear")

    ds = gen_dataset(dv, mean, ds.rlat, ds.rlon, ds.lat, ds.lon, unit)

    logging.info("Remove water cells at original resolution")
    ds[dv].values[~mask_og] = np.nan
    nanmask = ~np.isnan(ds[dv].values)

    logging.info("Copying and downscaling dataset 10x")
    ds10 = regrid_ensemble(ds, dv, 10, copy=True)
    ds10[dv].values[~mask] = np.nan
    nrlon, nrlat = np.meshgrid(ds10.rlon, ds10.rlat)
    nanmask10 = ~np.isnan(ds10[dv].values)

    logging.info("Interpolating full remaining grid")
    points = np.stack([rlon[nanmask], rlat[nanmask]]).T
    target_points = np.stack([nrlon[nanmask10], nrlat[nanmask10]]).T
    values = ds[dv].values[nanmask]
    ds10[dv].values[nanmask10] = interpolate_dataset(points, values,
                                                     target_points, "linear")

    logging.info("Add northern domain to model")
    ds10 = extend_north(ds10, dv, 210, fill_val=np.nan)

    nanmask10 = ~np.isnan(ds10[dv].values)

    canada_mask_path = resource_filename("climpyrical",
                                         "/tests/data/canada_mask_rp.nc")

    with read_data(canada_mask_path) as ds_canada:
        ca_mask = extend_north(ds_canada, "mask", 210, fill_val=np.nan)
        ca_mask = ds_canada["mask"].values

    # select NaN values within new mask
    ca_mask_or = ~np.logical_or(~ca_mask, nanmask10)

    logging.info("Fill remaining missing points using closest neighbour.")
    nrlon, nrlat = np.meshgrid(ds10.rlon.values, ds10.rlat.values)

    temp_field = ds10[dv].values

    points = np.stack([nrlon[nanmask10], nrlat[nanmask10]]).T
    target_points = np.stack([nrlon[ca_mask_or], nrlat[ca_mask_or]]).T
    target_values = ds10[dv].values[nanmask10]
    temp_field[~ca_mask] = np.nan

    temp_field[ca_mask_or] = interpolate_dataset(points, target_values,
                                                 target_points, "nearest")

    logging.info("Remove the processed northern region.")
    uaa_mask_path = resource_filename("climpyrical",
                                      "tests/data/canada_mask_north_rp.nc")
    uaa_mask = read_data(uaa_mask_path)["mask"]
    temp_field[uaa_mask] = np.nan

    ds_processed = gen_dataset(dv, temp_field, ds10.rlat, ds10.rlon, ds10.lat,
                               ds10.lon, unit)

    logging.info("Dataset generated and writing to file.")

    ds_processed.to_netcdf(out_path, "w")

    logging.info("Completed!")

Example #5

0

Show file

File: test_gridding.py Project: pacificclimate/climpyrical

        (np.ones((10, 10)), "int", TypeError),
        (np.ones((10, 10, 10)), 4, ValueError),
    ],
)
def test_check_ndims(data, n, error):
    if error is None:
        check_ndims(data, n)
    else:
        with pytest.raises(error):
            check_ndims(data, n)


# load example ensemble dataset for testing
# dv = "Rain-RL50"
dv = "snw"
ds = read_data(resource_filename("climpyrical", "tests/data/example2.nc"))

ds_regridded_proper = read_data(
    resource_filename("climpyrical", "tests/data/snw_target_res.nc"))


@pytest.mark.parametrize(
    "ds,dv,n,keys,copy",
    [
        (ds, dv, 3, ["rlon", "rlat", "lon", "lat"], True),
        (ds, dv, 3, ["rlon", "rlat", "lon", "lat"], False),
    ],
)
def test_regrid_ensemble(ds, dv, n, keys, copy):
    ndim = np.ndim(ds[dv].values)
    nds = regrid_ensemble(ds, dv, n, keys, copy)

Example #6

0

Show file

import sys
from climpyrical.data import read_data
from climpyrical.gridding import rot2reg
import warnings

warnings.filterwarnings("ignore")
"""
quick usage of climpyrical.rot2reg
usage:
python rot2reg input.nc output.nc
"""

IN_PATH = sys.argv[1]
OUT_PATH = sys.argv[2]

ds = read_data(IN_PATH)

lonlat_proj = {
    "proj": "longlat",
    "ellps": "WGS84",
    "datum": "WGS84",
    "no_defs": True,
}

rotated_proj = {
    "proj": "ob_tran",
    "o_proj": "longlat",
    "lon_0": -97,
    "o_lat_p": 42.5,
    "a": 6378137,
    "to_meter": 0.0174532925199,

Example #7

0

Show file

File: find_matched_model_vals.py Project: pacificclimate/climpyrical

def add_model_values(
    model_path=None,
    ds=None,
    stations_path=None,
    df=None,
    model_dv="model_values",
    log_level="INFO",
):
    """Locates the model value that's spatially closest to a station
    Args:
        model_path, stations_path (strings): directories of NetCDF4 file
            input and station file. Must give filename with extension
            .nc and .csv respectively
        out_path (str): directory of output csv file name. must include
            extension
        log_level (str): Default INFO
    Returns:
        Creates a .csv file with corresponding model values.
    """
    logging.basicConfig(level=log_level)
    if model_path is None and ds is None:
        raise ValueError("Please provide at least"
                         "model path or xarray.Dataset object")
    if model_path is not None and ds is not None:
        raise ValueError("Provided both model path"
                         "and xarray.Dataset. "
                         "Please only provide one or the other.")
    if ds is None and model_path is not None:
        ds = read_data(model_path)

    (dv, ) = ds.data_vars
    unit = ds[dv].attrs["units"]

    rlon, rlat = np.meshgrid(ds.rlon, ds.rlat)
    # mean = ds[dv].values

    accepted_units = ["kPa", "Pa", "degC", "mm", "unitless", "%"]

    logging.info(f"Detect units: {unit}")
    if unit not in accepted_units:
        warnings.warn(
            f"{unit} not recognized from list of accepted units: {accepted_units}"
        )

    # if unit == "degC":
    #     kelvin = 273.15  # K
    #     logging.info("Temperature field detected. Converting to Kelvin.")
    #     mean += kelvin
    #     ds[dv].attrs["units"] = "K"

    if stations_path is not None:
        if stations_path.endswith(".csv"):
            df = pd.read_csv(stations_path)
        else:
            df = pd.read_excel(stations_path)

    if stations_path is None and df is None:
        raise ValueError(
            "Must provide either stations_path or pandas.Dataframe")

    if "longitude" in df.columns:
        df = df.rename(columns={"longitude": "lon"})
    if "Lon" in df.columns:
        df = df.rename(columns={"Lon": "lon"})
    if "Lat" in df.columns:
        df = df.rename(columns={"Lat": "lat"})
    if "long" in df.columns:
        df = df.rename(columns={"long": "lon"})
    if "latitude" in df.columns:
        df = df.rename(columns={"latitude": "lat"})
    if "name" in df.columns:
        df = df.rename(columns={"name": "station_name"})
    if "Name" in df.columns:
        df = df.rename(columns={"Name": "station_name"})
    if "prov" in df.columns:
        df = df.rename(columns={"prov": "province"})
    if "elev" in df.columns:
        df = df.rename(columns={"elev": "elev (m)"})
    if "elevation (m)" in df.columns:
        df = df.rename(columns={"elevation (m)": "elev (m)"})

    keys = ["lat", "lon"]
    contains_keys = [key not in df.columns for key in keys]
    if np.any(contains_keys):
        raise KeyError(f"Dataframe must contain {keys}")

    rkeys = ["rlat", "rlon"]
    contains_rkeys = [key not in df.columns for key in rkeys]
    if np.any(contains_rkeys):
        logging.info("rlat or rlon not detected in input file."
                     "converting assumes WGS84 coords to rotated pole")
        nx, ny = transform_coords(df.lon.values, df.lat.values)
        df = df.assign(rlat=ny, rlon=nx)

    logging.info("Matching coordinates now")
    ix, iy = find_element_wise_nearest_pos(ds.rlon.values, ds.rlat.values,
                                           df.rlon.values, df.rlat.values)

    logging.info("Locating corresponding model values"
                 "Interpolating to nearest if matched model value is NaN")
    model_vals = find_nearest_index_value(ds.rlon.values, ds.rlat.values, ix,
                                          iy, ds[dv].values)

    if np.any(np.isnan(model_vals)):
        raise ValueError("NaN detected as matching output. Critical error.")

    df_new = df.assign(irlat=iy, irlon=ix)
    df_new[model_dv] = model_vals

    return df_new