def forecast( R, V, timesteps, n_cascade_levels=6, R_thr=None, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", ar_order=2, conditional=False, probmatching_method="cdf", num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, measure_time=False, ): """Generate a nowcast by using the Spectral Prognosis (S-PROG) method. Parameters ---------- R: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. V: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. n_cascade_levels: int, optional The number of cascade levels to use. R_thr: float The threshold value for minimum observable precipitation intensity. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of pysteps.cascade.interface. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold R_thr. probmatching_method: {'cdf','mean',None}, optional Method for matching the conditional statistics of the forecast field (areas with precipitation intensity above the threshold R_thr) with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the mean value, None=no matching applied. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical S-PROG model). If "spectral", the AR(2) models are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019a`. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. measure_time: bool If set to True, measure, print and return the computation time. Returns ------- out: ndarray A three-dimensional array of shape (num_timesteps,m,n) containing a time series of forecast precipitation fields. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields R. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface References ---------- :cite:`Seed2003`, :cite:`PCH2019a` """ _check_inputs(R, V, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() if filter_kwargs is None: filter_kwargs = dict() if np.any(~np.isfinite(V)): raise ValueError("V contains non-finite values") print("Computing S-PROG nowcast:") print("-------------------------") print("") print("Inputs:") print("-------") print("input dimensions: %dx%d" % (R.shape[1], R.shape[2])) print("") print("Methods:") print("--------") print("extrapolation: %s" % extrap_method) print("bandpass filter: %s" % bandpass_filter_method) print("decomposition: %s" % decomp_method) print("conditional statistics: %s" % ("yes" if conditional else "no")) print("probability matching: %s" % probmatching_method) print("FFT method: %s" % fft_method) print("domain: %s" % domain) print("") print("Parameters:") print("-----------") if isinstance(timesteps, int): print("number of time steps: %d" % timesteps) else: print("time steps: %s" % timesteps) print("parallel threads: %d" % num_workers) print("number of cascade levels: %d" % n_cascade_levels) print("order of the AR(p) model: %d" % ar_order) print("precip. intensity threshold: %g" % R_thr) if measure_time: starttime_init = time.time() fft = utils.get_method(fft_method, shape=R.shape[1:], n_threads=num_workers) M, N = R.shape[1:] # initialize the band-pass filter filter_method = cascade.get_method(bandpass_filter_method) filter = filter_method((M, N), n_cascade_levels, **filter_kwargs) decomp_method, recomp_method = cascade.get_method(decomp_method) extrapolator_method = extrapolation.get_method(extrap_method) R = R[-(ar_order + 1) :, :, :].copy() R_min = np.nanmin(R) # determine the domain mask from non-finite values domain_mask = np.logical_or.reduce( [~np.isfinite(R[i, :]) for i in range(R.shape[0])] ) # determine the precipitation threshold mask if conditional: MASK_thr = np.logical_and.reduce( [R[i, :, :] >= R_thr for i in range(R.shape[0])] ) else: MASK_thr = None # initialize the extrapolator x_values, y_values = np.meshgrid(np.arange(R.shape[2]), np.arange(R.shape[1])) xy_coords = np.stack([x_values, y_values]) extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["xy_coords"] = xy_coords extrap_kwargs["allow_nonfinite_values"] = True # advect the previous precipitation fields to the same position with the # most recent one (i.e. transform them into the Lagrangian coordinates) res = list() def f(R, i): return extrapolator_method(R[i, :], V, ar_order - i, "min", **extrap_kwargs)[-1] for i in range(ar_order): if not DASK_IMPORTED: R[i, :, :] = f(R, i) else: res.append(dask.delayed(f)(R, i)) if DASK_IMPORTED: num_workers_ = len(res) if num_workers > len(res) else num_workers R = np.stack(list(dask.compute(*res, num_workers=num_workers_)) + [R[-1, :, :]]) # replace non-finite values with the minimum value R = R.copy() for i in range(R.shape[0]): R[i, ~np.isfinite(R[i, :])] = np.nanmin(R[i, :]) # compute the cascade decompositions of the input precipitation fields R_d = [] for i in range(ar_order + 1): R_ = decomp_method( R[i, :, :], filter, mask=MASK_thr, fft_method=fft, output_domain=domain, normalize=True, compute_stats=True, compact_output=True, ) R_d.append(R_) # rearrange the cascade levels into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model R_c = nowcast_utils.stack_cascades( R_d, n_cascade_levels, convert_to_full_arrays=True ) # compute lag-l temporal autocorrelation coefficients for each cascade level GAMMA = np.empty((n_cascade_levels, ar_order)) for i in range(n_cascade_levels): if domain == "spatial": GAMMA[i, :] = correlation.temporal_autocorrelation(R_c[i], mask=MASK_thr) else: GAMMA[i, :] = correlation.temporal_autocorrelation( R_c[i], domain="spectral", x_shape=R.shape[1:] ) R_c = nowcast_utils.stack_cascades( R_d, n_cascade_levels, convert_to_full_arrays=False ) R_d = R_d[-1] nowcast_utils.print_corrcoefs(GAMMA) if ar_order == 2: # adjust the lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(n_cascade_levels): GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2(GAMMA[i, 0], GAMMA[i, 1]) # estimate the parameters of the AR(p) model from the autocorrelation # coefficients PHI = np.empty((n_cascade_levels, ar_order + 1)) for i in range(n_cascade_levels): PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) nowcast_utils.print_ar_params(PHI) # discard all except the p-1 last cascades because they are not needed for # the AR(p) model R_c = [R_c[i][-ar_order:] for i in range(n_cascade_levels)] if probmatching_method == "mean": mu_0 = np.mean(R[-1, :, :][R[-1, :, :] >= R_thr]) # compute precipitation mask and wet area ratio MASK_p = R[-1, :, :] >= R_thr war = 1.0 * np.sum(MASK_p) / (R.shape[1] * R.shape[2]) if measure_time: init_time = time.time() - starttime_init R = R[-1, :, :] print("Starting nowcast computation.") if measure_time: starttime_mainloop = time.time() R_f = [] if isinstance(timesteps, int): timesteps = range(timesteps + 1) timestep_type = "int" else: original_timesteps = [0] + list(timesteps) timesteps = nowcast_utils.binned_timesteps(original_timesteps) timestep_type = "list" R_f_prev = R extrap_kwargs["return_displacement"] = True D = None t_prev = 0.0 # iterate each time step for t, subtimestep_idx in enumerate(timesteps): if timestep_type == "list": subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] else: subtimesteps = [t] if (timestep_type == "list" and subtimesteps) or ( timestep_type == "int" and t > 0 ): is_nowcast_time_step = True else: is_nowcast_time_step = False if is_nowcast_time_step: print( "Computing nowcast for time step %d... " % t, end="", flush=True, ) if measure_time: starttime = time.time() for i in range(n_cascade_levels): R_c[i] = autoregression.iterate_ar_model(R_c[i], PHI[i, :]) R_d["cascade_levels"] = [R_c[i][-1, :] for i in range(n_cascade_levels)] if domain == "spatial": R_d["cascade_levels"] = np.stack(R_d["cascade_levels"]) R_f_new = recomp_method(R_d) if domain == "spectral": R_f_new = fft.irfft2(R_f_new) MASK = _compute_sprog_mask(R_f_new, war) R_f_new[~MASK] = R_min if probmatching_method == "cdf": # adjust the CDF of the forecast to match the most recently # observed precipitation field R_f_new = probmatching.nonparam_match_empirical_cdf(R_f_new, R) elif probmatching_method == "mean": mu_fct = np.mean(R_f_new[MASK]) R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0 R_f_new[domain_mask] = np.nan # advect the recomposed precipitation field to obtain the forecast for # the current time step (or subtimesteps if non-integer time steps are # given) for t_sub in subtimesteps: if t_sub > 0: t_diff_prev_int = t_sub - int(t_sub) if t_diff_prev_int > 0.0: R_f_ip = ( 1.0 - t_diff_prev_int ) * R_f_prev + t_diff_prev_int * R_f_new else: R_f_ip = R_f_prev t_diff_prev = t_sub - t_prev extrap_kwargs["displacement_prev"] = D R_f_ep, D = extrapolator_method( R_f_ip, V, [t_diff_prev], **extrap_kwargs, ) R_f.append(R_f_ep[0]) t_prev = t_sub # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: t_diff_prev = t + 1 - t_prev extrap_kwargs["displacement_prev"] = D _, D = extrapolator_method( None, V, [t_diff_prev], **extrap_kwargs, ) t_prev = t + 1 R_f_prev = R_f_new if is_nowcast_time_step: if measure_time: print("%.2f seconds." % (time.time() - starttime)) else: print("done.") if measure_time: mainloop_time = time.time() - starttime_mainloop R_f = np.stack(R_f) if measure_time: return R_f, init_time, mainloop_time else: return R_f
def forecast( R, V, timesteps, n_ens_members=24, n_cascade_levels=6, R_thr=None, kmperpixel=None, timestep=None, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method="nonparametric", noise_stddev_adj=None, ar_order=2, vel_pert_method="bps", conditional=False, probmatching_method="cdf", mask_method="incremental", callback=None, return_output=True, seed=None, num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, vel_pert_kwargs=None, mask_kwargs=None, measure_time=False, ): """Generate a nowcast ensemble by using the Short-Term Ensemble Prediction System (STEPS) method. Parameters ---------- R: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. V: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. n_ens_members: int, optional The number of ensemble members to generate. n_cascade_levels: int, optional The number of cascade levels to use. R_thr: float, optional Specifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None or mask_method is 'incremental'. timestep: float, optional Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of pysteps.cascade.interface. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of pysteps.noise.interface. If set to None, no noise is generated. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in pysteps.noise.utils.compute_noise_stddev_adjs. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. vel_pert_method: {'bps',None}, optional Name of the noise generator to use for perturbing the advection field. See the documentation of pysteps.noise.interface. If set to None, the advection field is not perturbed. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold R_thr. mask_method: {'obs','sprog','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply R_thr to the most recently observed precipitation intensity field, 'sprog' = use the smoothed forecast field from S-PROG, where the AR(p) model has been applied, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. probmatching_method: {'cdf','mean',None}, optional Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the conditional mean value of the forecast field in precipitation areas, None=no matching applied. Using 'mean' requires that mask_method is not None. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field R, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. seed: int, optional Optional seed number for the random generators. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of pysteps.noise.fftgenerators. vel_pert_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. Default parameters from :cite:`BPS2006`: p_par = [10.88, 0.23, -7.68] p_perp = [5.76, 0.31, -2.72] Parameters fitted to the data (optical flow/domain): darts/fmi: p_par = [13.71259667, 0.15658963, -16.24368207] p_perp = [8.26550355, 0.17820458, -9.54107834] darts/mch: p_par = [24.27562298, 0.11297186, -27.30087471] p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] darts/fmi+mch: p_par = [16.55447057, 0.14160448, -19.24613059] p_perp = [14.75343395, 0.11785398, -16.26151612] lucaskanade/fmi: p_par = [2.20837526, 0.33887032, -2.48995355] p_perp = [2.21722634, 0.32359621, -2.57402761] lucaskanade/mch: p_par = [2.56338484, 0.3330941, -2.99714349] p_perp = [1.31204508, 0.3578426, -1.02499891] lucaskanade/fmi+mch: p_par = [2.31970635, 0.33734287, -2.64972861] p_perp = [1.90769947, 0.33446594, -2.06603662] vet/fmi: p_par = [0.25337388, 0.67542291, 11.04895538] p_perp = [0.02432118, 0.99613295, 7.40146505] vet/mch: p_par = [0.5075159, 0.53895212, 7.90331791] p_perp = [0.68025501, 0.41761289, 4.73793581] vet/fmi+mch: p_par = [0.29495222, 0.62429207, 8.6804131 ] p_perp = [0.23127377, 0.59010281, 5.98180004] fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set The above parameters have been fitten by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See pysteps.noise.motion for additional documentation. mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f' and 'mask_rim', the factor defining the the mask increment and the rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields R. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface, pysteps.noise.interface, pysteps.noise.utils.compute_noise_stddev_adjs References ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ _check_inputs(R, V, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() if filter_kwargs is None: filter_kwargs = dict() if noise_kwargs is None: noise_kwargs = dict() if vel_pert_kwargs is None: vel_pert_kwargs = dict() if mask_kwargs is None: mask_kwargs = dict() if np.any(~np.isfinite(V)): raise ValueError("V contains non-finite values") if mask_method not in ["obs", "sprog", "incremental", None]: raise ValueError( "unknown mask method %s: must be 'obs', 'sprog' or 'incremental' or None" % mask_method ) if conditional and R_thr is None: raise ValueError("conditional=True but R_thr is not set") if mask_method is not None and R_thr is None: raise ValueError("mask_method!=None but R_thr=None") if noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None" % noise_stddev_adj ) if kmperpixel is None: if vel_pert_method is not None: raise ValueError("vel_pert_method is set but kmperpixel=None") if mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") if timestep is None: if vel_pert_method is not None: raise ValueError("vel_pert_method is set but timestep=None") if mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") print("Computing STEPS nowcast:") print("------------------------") print("") print("Inputs:") print("-------") print("input dimensions: %dx%d" % (R.shape[1], R.shape[2])) if kmperpixel is not None: print("km/pixel: %g" % kmperpixel) if timestep is not None: print("time step: %d minutes" % timestep) print("") print("Methods:") print("--------") print("extrapolation: %s" % extrap_method) print("bandpass filter: %s" % bandpass_filter_method) print("decomposition: %s" % decomp_method) print("noise generator: %s" % noise_method) print("noise adjustment: %s" % ("yes" if noise_stddev_adj else "no")) print("velocity perturbator: %s" % vel_pert_method) print("conditional statistics: %s" % ("yes" if conditional else "no")) print("precip. mask method: %s" % mask_method) print("probability matching: %s" % probmatching_method) print("FFT method: %s" % fft_method) print("domain: %s" % domain) print("") print("Parameters:") print("-----------") if isinstance(timesteps, int): print("number of time steps: %d" % timesteps) else: print("time steps: %s" % timesteps) print("ensemble size: %d" % n_ens_members) print("parallel threads: %d" % num_workers) print("number of cascade levels: %d" % n_cascade_levels) print("order of the AR(p) model: %d" % ar_order) if vel_pert_method == "bps": vp_par = vel_pert_kwargs.get("p_par", noise.motion.get_default_params_bps_par()) vp_perp = vel_pert_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) print( "velocity perturbations, parallel: %g,%g,%g" % (vp_par[0], vp_par[1], vp_par[2]) ) print( "velocity perturbations, perpendicular: %g,%g,%g" % (vp_perp[0], vp_perp[1], vp_perp[2]) ) if conditional or mask_method is not None: print("precip. intensity threshold: %g" % R_thr) num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers if measure_time: starttime_init = time.time() fft = utils.get_method(fft_method, shape=R.shape[1:], n_threads=num_workers) M, N = R.shape[1:] # initialize the band-pass filter filter_method = cascade.get_method(bandpass_filter_method) filter = filter_method((M, N), n_cascade_levels, **filter_kwargs) decomp_method, recomp_method = cascade.get_method(decomp_method) extrapolator_method = extrapolation.get_method(extrap_method) x_values, y_values = np.meshgrid(np.arange(R.shape[2]), np.arange(R.shape[1])) xy_coords = np.stack([x_values, y_values]) R = R[-(ar_order + 1) :, :, :].copy() # determine the domain mask from non-finite values domain_mask = np.logical_or.reduce( [~np.isfinite(R[i, :]) for i in range(R.shape[0])] ) # determine the precipitation threshold mask if conditional: MASK_thr = np.logical_and.reduce( [R[i, :, :] >= R_thr for i in range(R.shape[0])] ) else: MASK_thr = None # advect the previous precipitation fields to the same position with the # most recent one (i.e. transform them into the Lagrangian coordinates) extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["xy_coords"] = xy_coords extrap_kwargs["allow_nonfinite_values"] = True res = list() def f(R, i): return extrapolator_method(R[i, :, :], V, ar_order - i, "min", **extrap_kwargs)[ -1 ] for i in range(ar_order): if not DASK_IMPORTED: R[i, :, :] = f(R, i) else: res.append(dask.delayed(f)(R, i)) if DASK_IMPORTED: num_workers_ = len(res) if num_workers > len(res) else num_workers R = np.stack(list(dask.compute(*res, num_workers=num_workers_)) + [R[-1, :, :]]) # replace non-finite values with the minimum value R = R.copy() for i in range(R.shape[0]): R[i, ~np.isfinite(R[i, :])] = np.nanmin(R[i, :]) if noise_method is not None: # get methods for perturbations init_noise, generate_noise = noise.get_method(noise_method) # initialize the perturbation generator for the precipitation field pp = init_noise(R, fft_method=fft, **noise_kwargs) if noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) if measure_time: starttime = time.time() R_min = np.min(R) noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( R[-1, :, :], R_thr, R_min, filter, decomp_method, pp, generate_noise, 20, conditional=True, num_workers=num_workers, ) if measure_time: print("%.2f seconds." % (time.time() - starttime)) else: print("done.") elif noise_stddev_adj == "fixed": f = lambda k: 1.0 / (0.75 + 0.09 * k) noise_std_coeffs = [f(k) for k in range(1, n_cascade_levels + 1)] else: noise_std_coeffs = np.ones(n_cascade_levels) if noise_stddev_adj is not None: print("noise std. dev. coeffs: %s" % str(noise_std_coeffs)) # compute the cascade decompositions of the input precipitation fields R_d = [] for i in range(ar_order + 1): R_ = decomp_method( R[i, :, :], filter, mask=MASK_thr, fft_method=fft, output_domain=domain, normalize=True, compute_stats=True, compact_output=True, ) R_d.append(R_) # normalize the cascades and rearrange them into a four-dimensional array # of shape (n_cascade_levels,ar_order+1,m,n) for the autoregressive model R_c = nowcast_utils.stack_cascades(R_d, n_cascade_levels) R_d = R_d[-1] R_d = [R_d.copy() for j in range(n_ens_members)] # compute lag-l temporal autocorrelation coefficients for each cascade level GAMMA = np.empty((n_cascade_levels, ar_order)) for i in range(n_cascade_levels): GAMMA[i, :] = correlation.temporal_autocorrelation(R_c[i], mask=MASK_thr) nowcast_utils.print_corrcoefs(GAMMA) if ar_order == 2: # adjust the lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(n_cascade_levels): GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2(GAMMA[i, 0], GAMMA[i, 1]) # estimate the parameters of the AR(p) model from the autocorrelation # coefficients PHI = np.empty((n_cascade_levels, ar_order + 1)) for i in range(n_cascade_levels): PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) nowcast_utils.print_ar_params(PHI) # discard all except the p-1 last cascades because they are not needed for # the AR(p) model R_c = [R_c[i][-ar_order:] for i in range(n_cascade_levels)] # stack the cascades into a list containing all ensemble members R_c = [ [R_c[j].copy() for j in range(n_cascade_levels)] for i in range(n_ens_members) ] # initialize the random generators if noise_method is not None: randgen_prec = [] randgen_motion = [] np.random.seed(seed) for j in range(n_ens_members): rs = np.random.RandomState(seed) randgen_prec.append(rs) seed = rs.randint(0, high=1e9) rs = np.random.RandomState(seed) randgen_motion.append(rs) seed = rs.randint(0, high=1e9) if vel_pert_method is not None: init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method) # initialize the perturbation generators for the motion field vps = [] for j in range(n_ens_members): kwargs = { "randstate": randgen_motion[j], "p_par": vp_par, "p_perp": vp_perp, } vp_ = init_vel_noise(V, 1.0 / kmperpixel, timestep, **kwargs) vps.append(vp_) D = [None for j in range(n_ens_members)] R_f = [[] for j in range(n_ens_members)] if probmatching_method == "mean": mu_0 = np.mean(R[-1, :, :][R[-1, :, :] >= R_thr]) R_m = None if mask_method is not None: MASK_prec = R[-1, :, :] >= R_thr if mask_method == "obs": pass elif mask_method == "sprog": # compute the wet area ratio and the precipitation mask war = 1.0 * np.sum(MASK_prec) / (R.shape[1] * R.shape[2]) R_m = [R_c[0][i].copy() for i in range(n_cascade_levels)] R_m_d = R_d[0].copy() elif mask_method == "incremental": # get mask parameters mask_rim = mask_kwargs.get("mask_rim", 10) mask_f = mask_kwargs.get("mask_f", 1.0) # initialize the structuring element struct = scipy.ndimage.generate_binary_structure(2, 1) # iterate it to expand it nxn n = mask_f * timestep / kmperpixel struct = scipy.ndimage.iterate_structure(struct, int((n - 1) / 2.0)) # initialize precip mask for each member MASK_prec = _compute_incremental_mask(MASK_prec, struct, mask_rim) MASK_prec = [MASK_prec.copy() for j in range(n_ens_members)] if noise_method is None and R_m is None: R_m = [R_c[0][i].copy() for i in range(n_cascade_levels)] fft_objs = [] for i in range(n_ens_members): fft_objs.append(utils.get_method(fft_method, shape=R.shape[1:])) if measure_time: init_time = time.time() - starttime_init R = R[-1, :, :] print("Starting nowcast computation.") if measure_time: starttime_mainloop = time.time() if isinstance(timesteps, int): timesteps = range(timesteps + 1) timestep_type = "int" else: original_timesteps = [0] + list(timesteps) timesteps = nowcast_utils.binned_timesteps(original_timesteps) timestep_type = "list" extrap_kwargs["return_displacement"] = True R_f_prev = [R for i in range(n_ens_members)] t_prev = [0.0 for j in range(n_ens_members)] t_total = [0.0 for j in range(n_ens_members)] # iterate each time step for t, subtimestep_idx in enumerate(timesteps): if timestep_type == "list": subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] else: subtimesteps = [t] if (timestep_type == "list" and subtimesteps) or ( timestep_type == "int" and t > 0 ): is_nowcast_time_step = True else: is_nowcast_time_step = False if is_nowcast_time_step: print( "Computing nowcast for time step %d... " % t, end="", flush=True, ) if measure_time: starttime = time.time() if noise_method is None or mask_method == "sprog": for i in range(n_cascade_levels): # use a separate AR(p) model for the non-perturbed forecast, # from which the mask is obtained R_m[i] = autoregression.iterate_ar_model(R_m[i], PHI[i, :]) R_m_d["cascade_levels"] = [R_m[i][-1] for i in range(n_cascade_levels)] if domain == "spatial": R_m_d["cascade_levels"] = np.stack(R_m_d["cascade_levels"]) R_m_ = recomp_method(R_m_d) if domain == "spectral": R_m_ = fft.irfft2(R_m_) if mask_method == "sprog": MASK_prec = _compute_sprog_mask(R_m_, war) # the nowcast iteration for each ensemble member def worker(j): if noise_method is not None: # generate noise field EPS = generate_noise( pp, randstate=randgen_prec[j], fft_method=fft_objs[j], domain=domain ) # decompose the noise field into a cascade EPS = decomp_method( EPS, filter, fft_method=fft_objs[j], input_domain=domain, output_domain=domain, compute_stats=True, normalize=True, compact_output=True, ) else: EPS = None # iterate the AR(p) model for each cascade level for i in range(n_cascade_levels): # normalize the noise cascade if EPS is not None: EPS_ = EPS["cascade_levels"][i] EPS_ *= noise_std_coeffs[i] else: EPS_ = None # apply AR(p) process to cascade level if EPS is not None or vel_pert_method is not None: R_c[j][i] = autoregression.iterate_ar_model( R_c[j][i], PHI[i, :], eps=EPS_ ) else: # use the deterministic AR(p) model computed above if # perturbations are disabled R_c[j][i] = R_m[i] EPS = None EPS_ = None # compute the recomposed precipitation field(s) from the cascades # obtained from the AR(p) model(s) R_d[j]["cascade_levels"] = [ R_c[j][i][-1, :] for i in range(n_cascade_levels) ] if domain == "spatial": R_d[j]["cascade_levels"] = np.stack(R_d[j]["cascade_levels"]) R_f_new = recomp_method(R_d[j]) if domain == "spectral": R_f_new = fft_objs[j].irfft2(R_f_new) if mask_method is not None: # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed R_cmin = R_f_new.min() if mask_method == "incremental": R_f_new = R_cmin + (R_f_new - R_cmin) * MASK_prec[j] MASK_prec_ = R_f_new > R_cmin else: MASK_prec_ = MASK_prec # Set to min value outside of mask R_f_new[~MASK_prec_] = R_cmin if probmatching_method == "cdf": # adjust the CDF of the forecast to match the most recently # observed precipitation field R_f_new = probmatching.nonparam_match_empirical_cdf(R_f_new, R) elif probmatching_method == "mean": MASK = R_f_new >= R_thr mu_fct = np.mean(R_f_new[MASK]) R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0 if mask_method == "incremental": MASK_prec[j] = _compute_incremental_mask( R_f_new >= R_thr, struct, mask_rim ) R_f_new[domain_mask] = np.nan R_f_out = [] extrap_kwargs_ = extrap_kwargs.copy() V_pert = V # advect the recomposed precipitation field to obtain the forecast for # the current time step (or subtimesteps if non-integer time steps are # given) for t_sub in subtimesteps: if t_sub > 0: t_diff_prev_int = t_sub - int(t_sub) if t_diff_prev_int > 0.0: R_f_ip = (1.0 - t_diff_prev_int) * R_f_prev[ j ] + t_diff_prev_int * R_f_new else: R_f_ip = R_f_prev[j] t_diff_prev = t_sub - t_prev[j] t_total[j] += t_diff_prev # compute the perturbed motion field if vel_pert_method is not None: V_pert = V + generate_vel_noise(vps[j], t_total[j] * timestep) extrap_kwargs_["displacement_prev"] = D[j] R_f_ep, D[j] = extrapolator_method( R_f_ip, V_pert, [t_diff_prev], **extrap_kwargs_, ) R_f_out.append(R_f_ep[0]) t_prev[j] = t_sub # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: t_diff_prev = t + 1 - t_prev[j] t_total[j] += t_diff_prev # compute the perturbed motion field if vel_pert_method is not None: V_pert = V + generate_vel_noise(vps[j], t_total[j] * timestep) extrap_kwargs_["displacement_prev"] = D[j] _, D[j] = extrapolator_method( None, V_pert, [t_diff_prev], **extrap_kwargs_, ) t_prev[j] = t + 1 R_f_prev[j] = R_f_new return R_f_out res = [] for j in range(n_ens_members): if not DASK_IMPORTED or n_ens_members == 1: res.append(worker(j)) else: res.append(dask.delayed(worker)(j)) R_f_ = ( dask.compute(*res, num_workers=num_ensemble_workers) if DASK_IMPORTED and n_ens_members > 1 else res ) res = None if is_nowcast_time_step: if measure_time: print("%.2f seconds." % (time.time() - starttime)) else: print("done.") if callback is not None: R_f_stacked = np.stack(R_f_) if R_f_stacked.shape[1] > 0: callback(R_f_stacked.squeeze()) if return_output: for j in range(n_ens_members): R_f[j].extend(R_f_[j]) R_f_ = None if measure_time: mainloop_time = time.time() - starttime_mainloop if return_output: outarr = np.stack([np.stack(R_f[j]) for j in range(n_ens_members)]) if measure_time: return outarr, init_time, mainloop_time else: return outarr else: return None
def forecast( vil, velocity, timesteps, rainrate=None, n_cascade_levels=8, extrap_method="semilagrangian", ar_order=2, ar_window_radius=50, r_vil_window_radius=3, fft_method="numpy", apply_rainrate_mask=True, num_workers=1, extrap_kwargs=None, filter_kwargs=None, measure_time=False, ): """ Generate a nowcast by using the autoregressive nowcasting using VIL (ANVIL) method. ANVIL is built on top of an extrapolation-based nowcast. The key features are: 1) Growth and decay: implemented by using a cascade decomposition and a multiscale autoregressive integrated ARI(p,1) model. Instead of the original time series, the ARI model is applied to the differenced one corresponding to time derivatives. 2) Originally designed for using integrated liquid (VIL) as the input data. In this case, the rain rate (R) is obtained from VIL via an empirical relation. This implementation is more general so that the input can be any two-dimensional precipitation field. 3) The parameters of the ARI model and the R(VIL) relation are allowed to be spatially variable. The estimation is done using a moving window. Parameters ---------- vil: array_like Array of shape (ar_order+2,m,n) containing the input fields ordered by timestamp from oldest to newest. The inputs are expected to contain VIL or rain rate. The time steps between the inputs are assumed to be regular. velocity: array_like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. rainrate: array_like Array of shape (m,n) containing the most recently observed rain rate field. If set to None, no R(VIL) conversion is done and the outputs are in the same units as the inputs. n_cascade_levels: int, optional The number of cascade levels to use. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. ar_order: int, optional The order of the autoregressive model to use. The recommended values are 1 or 2. Using a higher-order model is strongly discouraged because the stationarity of the AR process cannot be guaranteed. ar_window_radius: int, optional The radius of the window to use for determining the parameters of the autoregressive model. Set to None to disable localization. r_vil_window_radius: int, optional The radius of the window to use for determining the R(VIL) relation. Applicable if rainrate is not None. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. apply_rainrate_mask: bool Apply mask to prevent producing precipitation to areas where it was not originally observed. Defaults to True. Disabling this may improve some verification metrics but increases the number of false alarms. Applicable if rainrate is None. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is installed or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. measure_time: bool, optional If True, measure, print and return the computation time. Returns ------- out: ndarray A three-dimensional array of shape (num_timesteps,m,n) containing a time series of forecast precipitation fields. The time series starts from t0+timestep, where timestep is taken from the input VIL/rain rate fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). References ---------- :cite:`PCLH2020` """ _check_inputs(vil, rainrate, velocity, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() if filter_kwargs is None: filter_kwargs = dict() print("Computing ANVIL nowcast:") print("------------------------") print("") print("Inputs:") print("-------") print("input dimensions: %dx%d" % (vil.shape[1], vil.shape[2])) print("") print("Methods:") print("--------") print("extrapolation: %s" % extrap_method) print("FFT: %s" % fft_method) print("") print("Parameters:") print("-----------") if isinstance(timesteps, int): print("number of time steps: %d" % timesteps) else: print("time steps: %s" % timesteps) print("parallel threads: %d" % num_workers) print("number of cascade levels: %d" % n_cascade_levels) print("order of the ARI(p,1) model: %d" % ar_order) if type(ar_window_radius) == int: print("ARI(p,1) window radius: %d" % ar_window_radius) else: print("ARI(p,1) window radius: none") print("R(VIL) window radius: %d" % r_vil_window_radius) if measure_time: starttime_init = time.time() m, n = vil.shape[1:] vil = vil.copy() if rainrate is None and apply_rainrate_mask: rainrate_mask = vil[-1, :] < 0.1 if rainrate is not None: # determine the coefficients fields of the relation R=a*VIL+b by # localized linear regression r_vil_a, r_vil_b = _r_vil_regression(vil[-1, :], rainrate, r_vil_window_radius) # transform the input fields to Lagrangian coordinates by extrapolation extrapolator = extrapolation.get_method(extrap_method) res = list() def worker(vil, i): return ( i, extrapolator( vil[i, :], velocity, vil.shape[0] - 1 - i, allow_nonfinite_values=True, **extrap_kwargs, )[-1], ) for i in range(vil.shape[0] - 1): if not DASK_IMPORTED or num_workers == 1: vil[i, :, :] = worker(vil, i)[1] else: res.append(dask.delayed(worker)(vil, i)) if DASK_IMPORTED and num_workers > 1: num_workers_ = len(res) if num_workers > len(res) else num_workers vil_e = dask.compute(*res, num_workers=num_workers_) for i in range(len(vil_e)): vil[vil_e[i][0], :] = vil_e[i][1] # compute the final mask as the intersection of the masks of the advected # fields mask = np.isfinite(vil[0, :]) for i in range(1, vil.shape[0]): mask = np.logical_and(mask, np.isfinite(vil[i, :])) if rainrate is None and apply_rainrate_mask: rainrate_mask = np.logical_and(rainrate_mask, mask) # apply cascade decomposition to the advected input fields bp_filter_method = cascade.get_method("gaussian") bp_filter = bp_filter_method((m, n), n_cascade_levels, **filter_kwargs) fft = utils.get_method(fft_method, shape=vil.shape[1:], n_threads=num_workers) decomp_method, recomp_method = cascade.get_method("fft") vil_dec = np.empty((n_cascade_levels, vil.shape[0], m, n)) for i in range(vil.shape[0]): vil_ = vil[i, :].copy() vil_[~np.isfinite(vil_)] = 0.0 vil_dec_i = decomp_method(vil_, bp_filter, fft_method=fft) for j in range(n_cascade_levels): vil_dec[j, i, :] = vil_dec_i["cascade_levels"][j, :] # compute time-lagged correlation coefficients for the cascade levels of # the advected and differenced input fields gamma = np.empty((n_cascade_levels, ar_order, m, n)) for i in range(n_cascade_levels): vil_diff = np.diff(vil_dec[i, :], axis=0) vil_diff[~np.isfinite(vil_diff)] = 0.0 for j in range(ar_order): gamma[i, j, :] = _moving_window_corrcoef( vil_diff[-1, :], vil_diff[-(j + 2), :], ar_window_radius ) if ar_order == 2: # if the order of the ARI model is 2, adjust the correlation coefficients # so that the resulting process is stationary for i in range(n_cascade_levels): gamma[i, 1, :] = autoregression.adjust_lag2_corrcoef2( gamma[i, 0, :], gamma[i, 1, :] ) # estimate the parameters of the ARI models phi = [] for i in range(n_cascade_levels): if ar_order > 2: phi_ = autoregression.estimate_ar_params_yw_localized(gamma[i, :], d=1) elif ar_order == 2: phi_ = _estimate_ar2_params(gamma[i, :]) else: phi_ = _estimate_ar1_params(gamma[i, :]) phi.append(phi_) vil_dec = vil_dec[:, -(ar_order + 1) :, :] if measure_time: init_time = time.time() - starttime_init print("Starting nowcast computation.") if measure_time: starttime_mainloop = time.time() r_f = [] if isinstance(timesteps, int): timesteps = range(timesteps + 1) timestep_type = "int" else: original_timesteps = [0] + list(timesteps) timesteps = nowcast_utils.binned_timesteps(original_timesteps) timestep_type = "list" if rainrate is not None: r_f_prev = r_vil_a * vil[-1, :] + r_vil_b else: r_f_prev = vil[-1, :] extrap_kwargs["return_displacement"] = True dp = None t_prev = 0.0 for t, subtimestep_idx in enumerate(timesteps): if timestep_type == "list": subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] else: subtimesteps = [t] if (timestep_type == "list" and subtimesteps) or ( timestep_type == "int" and t > 0 ): is_nowcast_time_step = True else: is_nowcast_time_step = False if is_nowcast_time_step: print( "Computing nowcast for time step %d... " % t, end="", flush=True, ) if measure_time: starttime = time.time() # iterate the ARI models for each cascade level for i in range(n_cascade_levels): vil_dec[i, :] = autoregression.iterate_ar_model(vil_dec[i, :], phi[i]) # recompose the cascade to obtain the forecast field vil_dec_dict = {} vil_dec_dict["cascade_levels"] = vil_dec[:, -1, :] vil_dec_dict["domain"] = "spatial" vil_dec_dict["normalized"] = False vil_f = recomp_method(vil_dec_dict) vil_f[~mask] = np.nan if rainrate is not None: # convert VIL to rain rate r_f_new = r_vil_a * vil_f + r_vil_b else: r_f_new = vil_f if apply_rainrate_mask: r_f_new[rainrate_mask] = 0.0 r_f_new[r_f_new < 0.0] = 0.0 # advect the recomposed field to obtain the forecast for the current # time step (or subtimesteps if non-integer time steps are given) for t_sub in subtimesteps: if t_sub > 0: t_diff_prev_int = t_sub - int(t_sub) if t_diff_prev_int > 0.0: r_f_ip = ( 1.0 - t_diff_prev_int ) * r_f_prev + t_diff_prev_int * r_f_new else: r_f_ip = r_f_prev t_diff_prev = t_sub - t_prev extrap_kwargs["displacement_prev"] = dp r_f_ep, dp = extrapolator( r_f_ip, velocity, [t_diff_prev], allow_nonfinite_values=True, **extrap_kwargs, ) r_f.append(r_f_ep[0]) t_prev = t_sub # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: t_diff_prev = t + 1 - t_prev extrap_kwargs["displacement_prev"] = dp _, dp = extrapolator( None, velocity, [t_diff_prev], allow_nonfinite_values=True, **extrap_kwargs, ) t_prev = t + 1 r_f_prev = r_f_new if is_nowcast_time_step: if measure_time: print("%.2f seconds." % (time.time() - starttime)) else: print("done.") if measure_time: mainloop_time = time.time() - starttime_mainloop if measure_time: return np.stack(r_f), init_time, mainloop_time else: return np.stack(r_f)