def test_prefix_attrs(): source = {"units": "mm/s", "name": "pr"} dest = fmt.prefix_attrs(source, ["units"], "original_") assert "original_units" in dest out = fmt.unprefix_attrs(dest, ["units"], "original_") assert out == source # Check that the "naked" units will be overwritten. dest["units"] = "" out = fmt.unprefix_attrs(dest, ["units"], "original_") assert out == source
def parametric_quantile(p: xr.DataArray, q: Union[int, Sequence]) -> xr.DataArray: """Return the value corresponding to the given distribution parameters and quantile. Parameters ---------- p : xr.DataArray Distribution parameters returned by the `fit` function. The array should have dimension `dparams` storing the distribution parameters, and attribute `scipy_dist`, storing the name of the distribution. q : Union[float, Sequence] Quantile to compute, which must be between `0` and `1`, inclusive. Returns ------- xarray.DataArray An array of parametric quantiles estimated from the distribution parameters. Notes ----- When all quantiles are above 0.5, the `isf` method is used instead of `ppf` because accuracy is sometimes better. """ q = np.atleast_1d(q) # Get the distribution dist = p.attrs["scipy_dist"] dc = get_dist(dist) # Create a lambda function to facilitate passing arguments to dask. There is probably a better way to do this. if np.all(q > 0.5): def func(x): return dc.isf(1 - q, *x) else: def func(x): return dc.ppf(q, *x) duck = dask.array if isinstance(p.data, dask.array.Array) else np data = duck.apply_along_axis(func, p.get_axis_num("dparams"), p) # Create coordinate for the return periods coords = dict(p.coords.items()) coords.pop("dparams") coords["quantile"] = q # Create dimensions dims = [d if d != "dparams" else "quantile" for d in p.dims] out = xr.DataArray(data=data, coords=coords, dims=dims) out.attrs = unprefix_attrs(p.attrs, ["units", "standard_name"], "original_") attrs = dict( long_name=f"{dist} quantiles", description=f"Quantiles estimated by the {dist} distribution", cell_methods=merge_attributes("dparams: ppf", out, new_line=" "), xclim_history=update_history( "Compute parametric quantiles from distribution parameters", new_name="parametric_quantile", parameters=p, ), ) out.attrs.update(attrs) return out
def parametric_cdf(p: xr.DataArray, v: Union[float, Sequence]) -> xr.DataArray: """Return the cumulative distribution function corresponding to the given distribution parameters and value. Parameters ---------- p : xr.DataArray Distribution parameters returned by the `fit` function. The array should have dimension `dparams` storing the distribution parameters, and attribute `scipy_dist`, storing the name of the distribution. v : Union[float, Sequence] Value to compute the CDF. Returns ------- xarray.DataArray An array of parametric CDF values estimated from the distribution parameters. Notes ----- """ v = np.atleast_1d(v) # Get the distribution dist = p.attrs["scipy_dist"] dc = get_dist(dist) # Create a lambda function to facilitate passing arguments to dask. There is probably a better way to do this. def func(x): return dc.cdf(v, *x) data = xr.apply_ufunc( func, p, input_core_dims=[["dparams"]], output_core_dims=[["cdf"]], vectorize=True, dask="parallelized", output_dtypes=[float], keep_attrs=True, dask_gufunc_kwargs={"output_sizes": { "cdf": len(v) }}, ) # Assign quantile coordinates and transpose to preserve original dimension order dims = [d if d != "dparams" else "cdf" for d in p.dims] out = data.assign_coords(cdf=v).transpose(*dims) out.attrs = unprefix_attrs(p.attrs, ["units", "standard_name"], "original_") attrs = dict( long_name=f"{dist} cdf", description=f"CDF estimated by the {dist} distribution", cell_methods="dparams: cdf", history=update_history( "Compute parametric cdf from distribution parameters", new_name="parametric_cdf", parameters=p, ), ) out.attrs.update(attrs) return out
def parametric_quantile(p: xr.DataArray, q: Union[int, Sequence]) -> xr.DataArray: """Return the value corresponding to the given distribution parameters and quantile. Parameters ---------- p : xr.DataArray Distribution parameters returned by the `fit` function. The array should have dimension `dparams` storing the distribution parameters, and attribute `scipy_dist`, storing the name of the distribution. q : Union[float, Sequence] Quantile to compute, which must be between `0` and `1`, inclusive. Returns ------- xarray.DataArray An array of parametric quantiles estimated from the distribution parameters. Notes ----- When all quantiles are above 0.5, the `isf` method is used instead of `ppf` because accuracy is sometimes better. """ q = np.atleast_1d(q) # Get the distribution dist = p.attrs["scipy_dist"] dc = get_dist(dist) # Create a lambda function to facilitate passing arguments to dask. There is probably a better way to do this. if np.all(q > 0.5): def func(x): return dc.isf(1 - q, *x) else: def func(x): return dc.ppf(q, *x) data = xr.apply_ufunc( func, p, input_core_dims=[["dparams"]], output_core_dims=[["quantile"]], vectorize=True, dask="parallelized", output_dtypes=[float], keep_attrs=True, dask_gufunc_kwargs={"output_sizes": { "quantile": len(q) }}, ) # Assign quantile coordinates and transpose to preserve original dimension order dims = [d if d != "dparams" else "quantile" for d in p.dims] out = data.assign_coords(quantile=q).transpose(*dims) out.attrs = unprefix_attrs(p.attrs, ["units", "standard_name"], "original_") attrs = dict( long_name=f"{dist} quantiles", description=f"Quantiles estimated by the {dist} distribution", cell_methods="dparams: ppf", history=update_history( "Compute parametric quantiles from distribution parameters", new_name="parametric_quantile", parameters=p, ), ) out.attrs.update(attrs) return out