def test_prefix_attrs(): source = {"units": "mm/s", "name": "pr"} dest = fmt.prefix_attrs(source, ["units"], "original_") assert "original_units" in dest out = fmt.unprefix_attrs(dest, ["units"], "original_") assert out == source # Check that the "naked" units will be overwritten. dest["units"] = "" out = fmt.unprefix_attrs(dest, ["units"], "original_") assert out == source
def fit( da: xr.DataArray, dist: str = "norm", method: str = "ML", dim: str = "time", **fitkwargs, ) -> xr.DataArray: """Fit an array to a univariate distribution along the time dimension. Parameters ---------- da : xr.DataArray Time series to be fitted along the time dimension. dist : str Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm (see scipy.stats for full list). If the PWM method is used, only the following distributions are currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'. method : {"ML", "PWM"} Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments. The PWM method is usually more robust to outliers. dim : str The dimension upon which to perform the indexing (default: "time"). **fitkwargs Other arguments passed directly to :py:func:`_fitstart` and to the distribution's `fit`. Returns ------- xr.DataArray An array of fitted distribution parameters. Notes ----- Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array still contains NaNs, the distribution parameters will be returned as NaNs. """ method_name = {"ML": "maximum likelihood", "PWM": "probability weighted moments"} # Get the distribution dc = get_dist(dist) if method == "PWM": lm3dc = get_lm3_dist(dist) shape_params = [] if dc.shapes is None else dc.shapes.split(",") dist_params = shape_params + ["loc", "scale"] # xarray.apply_ufunc does not yet support multiple outputs with dask parallelism. duck = dask.array if isinstance(da.data, dask.array.Array) else np data = duck.apply_along_axis( _fitfunc_1d, da.get_axis_num(dim), da, dist=dc if method == "ML" else lm3dc, nparams=len(dist_params), method=method, **fitkwargs, ) # Coordinates for the distribution parameters coords = dict(da.coords.items()) if dim in coords: coords.pop(dim) coords["dparams"] = dist_params # Dimensions for the distribution parameters dims = [d if d != dim else "dparams" for d in da.dims] out = xr.DataArray(data=data, coords=coords, dims=dims) out.attrs = prefix_attrs( da.attrs, ["standard_name", "long_name", "units", "description"], "original_" ) attrs = dict( long_name=f"{dist} parameters", description=f"Parameters of the {dist} distribution", method=method, estimator=method_name[method].capitalize(), scipy_dist=dist, units="", xclim_history=update_history( f"Estimate distribution parameters by {method_name[method]} method along dimension {dim}.", new_name="fit", data=da, ), ) out.attrs.update(attrs) return out
def fit( da: xr.DataArray, dist: str = "norm", method: str = "ML", dim: str = "time", **fitkwargs, ) -> xr.DataArray: """Fit an array to a univariate distribution along the time dimension. Parameters ---------- da : xr.DataArray Time series to be fitted along the time dimension. dist : str Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm (see scipy.stats for full list). If the PWM method is used, only the following distributions are currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'. method : {"ML", "PWM"} Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments. The PWM method is usually more robust to outliers. dim : str The dimension upon which to perform the indexing (default: "time"). fitkwargs Other arguments passed directly to :py:func:`_fitstart` and to the distribution's `fit`. Returns ------- xr.DataArray An array of fitted distribution parameters. Notes ----- Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array still contains NaNs, the distribution parameters will be returned as NaNs. """ method_name = { "ML": "maximum likelihood", "PWM": "probability weighted moments" } # Get the distribution dc = get_dist(dist) if method == "PWM": lm3dc = get_lm3_dist(dist) shape_params = [] if dc.shapes is None else dc.shapes.split(",") dist_params = shape_params + ["loc", "scale"] data = xr.apply_ufunc( _fitfunc_1d, da, input_core_dims=[[dim]], output_core_dims=[["dparams"]], vectorize=True, dask="parallelized", output_dtypes=[float], keep_attrs=True, kwargs=dict( dist=dc if method == "ML" else lm3dc, nparams=len(dist_params), method=method, **fitkwargs, ), dask_gufunc_kwargs={"output_sizes": { "dparams": len(dist_params) }}, ) # Add coordinates for the distribution parameters and transpose to original shape (with dim -> dparams) dims = [d if d != dim else "dparams" for d in da.dims] out = data.assign_coords(dparams=dist_params).transpose(*dims) out.attrs = prefix_attrs( da.attrs, ["standard_name", "long_name", "units", "description"], "original_") attrs = dict( long_name=f"{dist} parameters", description=f"Parameters of the {dist} distribution", method=method, estimator=method_name[method].capitalize(), scipy_dist=dist, units="", history=update_history( f"Estimate distribution parameters by {method_name[method]} method along dimension {dim}.", new_name="fit", data=da, ), ) out.attrs.update(attrs) return out
def fit(da: xr.DataArray, dist: str = "norm", method="ML"): """Fit an array to a univariate distribution along the time dimension. Parameters ---------- da : xr.DataArray Time series to be fitted along the time dimension. dist : str Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm (see scipy.stats for full list). If the PWM method is used, only the following distributions are currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'. method : {"ML", "PWM"} Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments. The PWM method is usually more robust to outliers. Returns ------- xr.DataArray An array of fitted distribution parameters. Notes ----- Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array still contains NaNs, the distribution parameters will be returned as NaNs. """ method_name = {"ML": "maximum likelihood", "PWM": "probability weighted moments"} # Get the distribution dc = get_dist(dist) if method == "PWM": lm3dc = get_lm3_dist(dist) shape_params = [] if dc.shapes is None else dc.shapes.split(",") dist_params = shape_params + ["loc", "scale"] # Fit the parameters. # This would also be the place to impose constraints on the series minimum length if needed. def fitfunc(arr): """Fit distribution parameters.""" x = np.ma.masked_invalid(arr).compressed() # Return NaNs if array is empty. if len(x) <= 1: return [np.nan] * len(dist_params) # Estimate parameters if method == "ML": args, kwargs = _fit_start(x, dist) params = dc.fit(x, *args, **kwargs) elif method == "PWM": params = list(lm3dc.lmom_fit(x).values()) # Fill with NaNs if one of the parameters is NaN if np.isnan(params).any(): params[:] = np.nan return params # xarray.apply_ufunc does not yet support multiple outputs with dask parallelism. duck = dask.array if isinstance(da.data, dask.array.Array) else np data = duck.apply_along_axis(fitfunc, da.get_axis_num("time"), da) # Coordinates for the distribution parameters coords = dict(da.coords.items()) coords.pop("time") coords["dparams"] = dist_params # Dimensions for the distribution parameters dims = [d if d != "time" else "dparams" for d in da.dims] out = xr.DataArray(data=data, coords=coords, dims=dims) out.attrs = prefix_attrs( da.attrs, ["standard_name", "long_name", "units", "description"], "original_" ) attrs = dict( long_name=f"{dist} parameters", description=f"Parameters of the {dist} distribution", method=method, estimator=method_name[method].capitalize(), scipy_dist=dist, units="", xclim_history=update_history( f"Estimate distribution parameters by {method_name[method]} method.", new_name="fit", data=da, ), ) out.attrs.update(attrs) return out